Classes
class	Accuracy

class	AdaDeltaOptimizer

class	AdaGradOptimizer

class	AdamOptimizer

class	Bilinear

class	Constant

class	Context
	Context interface. More...

class	DataBatch
	Default object for holding a mini-batch of data and related information. More...

class	DataIter

class	EvalMetric

class	Executor
	Executor interface. More...

class	FactorScheduler

class	FeedForward

struct	FeedForwardConfig

class	Initializer

class	KVStore

class	LogLoss

class	LRScheduler
	lr scheduler interface More...

class	MAE

class	Monitor
	Monitor interface. More...

class	MSE

class	MSRAPrelu

class	MXDataIter

struct	MXDataIterBlob

class	MXDataIterMap

class	NDArray
	NDArray interface. More...

struct	NDBlob
	struct to store NDArrayHandle More...

class	Normal

class	One

class	Operator
	Operator interface. More...

class	OpMap
	OpMap instance holds a map of all the symbol creators so we can get symbol creators by name. This is used internally by Symbol and Operator. More...

class	Optimizer
	Optimizer interface. More...

class	OptimizerRegistry

class	PSNR

class	RMSE

class	RMSPropOptimizer

class	SGDOptimizer

struct	Shape
	dynamic shape class that can hold shape of arbirary dimension More...

class	SignumOptimizer

struct	SymBlob
	struct to store SymbolHandle More...

class	Symbol
	Symbol interface. More...

class	Uniform

class	Xavier

class	Zero

Typedefs
typedef unsigned	index_t

typedef std::function< Optimizer *()>	OptimizerCreator

Enumerations
enum	OpReqType { kNullOp, kWriteTo, kWriteInplace, kAddTo }

enum	DeviceType { kCPU = 1, kGPU = 2, kCPUPinned = 3 }

enum	PickMode { PickMode::kClip = 0, PickMode::kWrap = 1 }

enum	DotForwardStype { DotForwardStype::kNone = 0, DotForwardStype::kCsr = 1, DotForwardStype::kDefault = 2, DotForwardStype::kRow_sparse = 3 }

enum	Batch_dotForwardStype { Batch_dotForwardStype::kNone = 0, Batch_dotForwardStype::kCsr = 1, Batch_dotForwardStype::kDefault = 2, Batch_dotForwardStype::kRow_sparse = 3 }

enum	CastDtype { CastDtype::kFloat16 = 0, CastDtype::kFloat32 = 1, CastDtype::kFloat64 = 2, CastDtype::kInt32 = 3, CastDtype::kInt64 = 4, CastDtype::kInt8 = 5, CastDtype::kUint8 = 6 }

enum	TopkRetTyp { TopkRetTyp::kBoth = 0, TopkRetTyp::kIndices = 1, TopkRetTyp::kMask = 2, TopkRetTyp::kValue = 3 }

enum	TopkDtype { TopkDtype::kFloat16 = 0, TopkDtype::kFloat32 = 1, TopkDtype::kFloat64 = 2, TopkDtype::kInt32 = 3, TopkDtype::kUint8 = 4 }

enum	ArgsortDtype { ArgsortDtype::kFloat16 = 0, ArgsortDtype::kFloat32 = 1, ArgsortDtype::kFloat64 = 2, ArgsortDtype::kInt32 = 3, ArgsortDtype::kUint8 = 4 }

enum	EmbeddingDtype { EmbeddingDtype::kFloat16 = 0, EmbeddingDtype::kFloat32 = 1, EmbeddingDtype::kFloat64 = 2, EmbeddingDtype::kInt32 = 3, EmbeddingDtype::kInt64 = 4, EmbeddingDtype::kInt8 = 5, EmbeddingDtype::kUint8 = 6 }

enum	TakeMode { TakeMode::kClip = 0, TakeMode::kRaise = 1, TakeMode::kWrap = 2 }

enum	One_hotDtype { One_hotDtype::kFloat16 = 0, One_hotDtype::kFloat32 = 1, One_hotDtype::kFloat64 = 2, One_hotDtype::kInt32 = 3, One_hotDtype::kInt64 = 4, One_hotDtype::kInt8 = 5, One_hotDtype::kUint8 = 6 }

enum	Cast_storageStype { Cast_storageStype::kCsr = 0, Cast_storageStype::kDefault = 1, Cast_storageStype::kRow_sparse = 2 }

enum	PoolingPoolType { PoolingPoolType::kAvg = 0, PoolingPoolType::kLp = 1, PoolingPoolType::kMax = 2, PoolingPoolType::kSum = 3 }

enum	PoolingPoolingConvention { PoolingPoolingConvention::kFull = 0, PoolingPoolingConvention::kValid = 1 }

enum	DeconvolutionCudnnTune { DeconvolutionCudnnTune::kNone = 0, DeconvolutionCudnnTune::kFastest = 1, DeconvolutionCudnnTune::kLimited_workspace = 2, DeconvolutionCudnnTune::kOff = 3 }

enum	DeconvolutionLayout { DeconvolutionLayout::kNone = 0, DeconvolutionLayout::kNCDHW = 1, DeconvolutionLayout::kNCHW = 2, DeconvolutionLayout::kNCW = 3, DeconvolutionLayout::kNDHWC = 4, DeconvolutionLayout::kNHWC = 5 }

enum	ActivationActType { ActivationActType::kRelu = 0, ActivationActType::kSigmoid = 1, ActivationActType::kSoftrelu = 2, ActivationActType::kSoftsign = 3, ActivationActType::kTanh = 4 }

enum	ConvolutionCudnnTune { ConvolutionCudnnTune::kNone = 0, ConvolutionCudnnTune::kFastest = 1, ConvolutionCudnnTune::kLimited_workspace = 2, ConvolutionCudnnTune::kOff = 3 }

enum	ConvolutionLayout { ConvolutionLayout::kNone = 0, ConvolutionLayout::kNCDHW = 1, ConvolutionLayout::kNCHW = 2, ConvolutionLayout::kNCW = 3, ConvolutionLayout::kNDHWC = 4, ConvolutionLayout::kNHWC = 5 }

enum	UpSamplingSampleType { UpSamplingSampleType::kBilinear = 0, UpSamplingSampleType::kNearest = 1 }

enum	UpSamplingMultiInputMode { UpSamplingMultiInputMode::kConcat = 0, UpSamplingMultiInputMode::kSum = 1 }

enum	DropoutMode { DropoutMode::kAlways = 0, DropoutMode::kTraining = 1 }

enum	SoftmaxActivationMode { SoftmaxActivationMode::kChannel = 0, SoftmaxActivationMode::kInstance = 1 }

enum	PadMode { PadMode::kConstant = 0, PadMode::kEdge = 1, PadMode::kReflect = 2 }

enum	LeakyReLUActType { LeakyReLUActType::kElu = 0, LeakyReLUActType::kLeaky = 1, LeakyReLUActType::kPrelu = 2, LeakyReLUActType::kRrelu = 3, LeakyReLUActType::kSelu = 4 }

enum	GridGeneratorTransformType { GridGeneratorTransformType::kAffine = 0, GridGeneratorTransformType::kWarp = 1 }

enum	Pooling_v1PoolType { Pooling_v1PoolType::kAvg = 0, Pooling_v1PoolType::kMax = 1, Pooling_v1PoolType::kSum = 2 }

enum	Pooling_v1PoolingConvention { Pooling_v1PoolingConvention::kFull = 0, Pooling_v1PoolingConvention::kValid = 1 }

enum	RNNMode { RNNMode::kGru = 0, RNNMode::kLstm = 1, RNNMode::kRnn_relu = 2, RNNMode::kRnn_tanh = 3 }

enum	Convolution_v1CudnnTune { Convolution_v1CudnnTune::kNone = 0, Convolution_v1CudnnTune::kFastest = 1, Convolution_v1CudnnTune::kLimited_workspace = 2, Convolution_v1CudnnTune::kOff = 3 }

enum	Convolution_v1Layout { Convolution_v1Layout::kNone = 0, Convolution_v1Layout::kNCDHW = 1, Convolution_v1Layout::kNCHW = 2, Convolution_v1Layout::kNDHWC = 3, Convolution_v1Layout::kNHWC = 4 }

enum	SpatialTransformerTransformType { SpatialTransformerTransformType::kAffine = 0 }

enum	SpatialTransformerSamplerType { SpatialTransformerSamplerType::kBilinear = 0 }

enum	SoftmaxOutputNormalization { SoftmaxOutputNormalization::kBatch = 0, SoftmaxOutputNormalization::kNull = 1, SoftmaxOutputNormalization::kValid = 2 }

enum	SoftmaxNormalization { SoftmaxNormalization::kBatch = 0, SoftmaxNormalization::kNull = 1, SoftmaxNormalization::kValid = 2 }

enum	L2NormalizationMode { L2NormalizationMode::kChannel = 0, L2NormalizationMode::kInstance = 1, L2NormalizationMode::kSpatial = 2 }

enum	MakeLossNormalization { MakeLossNormalization::kBatch = 0, MakeLossNormalization::kNull = 1, MakeLossNormalization::kValid = 2 }

Functions
NDArray	_default_monitor_func (const NDArray &x)
	Default function for monitor that computes statistics of the input tensor, which is the mean absolute \|x\|/size(x) More...

std::ostream &	operator<< (std::ostream &out, const NDArray &ndarray)

Symbol	khatri_rao (const std::string &symbol_name, const std::vector< Symbol > &args)

Symbol	Custom (const std::string &symbol_name, const std::vector< Symbol > &data, const std::string &op_type)

Symbol	broadcast_power (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_maximum (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_minimum (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_hypot (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	Reshape (const std::string &symbol_name, Symbol data, Shape shape=Shape(), bool reverse=false, Shape target_shape=Shape(), bool keep_highest=false)

Symbol	Flatten (const std::string &symbol_name, Symbol data)

Symbol	transpose (const std::string &symbol_name, Symbol data, Shape axes=Shape())

Symbol	expand_dims (const std::string &symbol_name, Symbol data, int axis)

Symbol	slice (const std::string &symbol_name, Symbol data, Shape begin, Shape end, Shape step=Shape())

Symbol	slice_axis (const std::string &symbol_name, Symbol data, int axis, int begin, dmlc::optional< int > end)

Symbol	slice_like (const std::string &symbol_name, Symbol data, Symbol shape_like, Shape axes=Shape())

Symbol	clip (const std::string &symbol_name, Symbol data, mx_float a_min, mx_float a_max)

Symbol	repeat (const std::string &symbol_name, Symbol data, int repeats, dmlc::optional< int > axis=dmlc::optional< int >())

Symbol	tile (const std::string &symbol_name, Symbol data, Shape reps)

Symbol	reverse (const std::string &symbol_name, Symbol data, Shape axis)

Symbol	stack (const std::string &symbol_name, const std::vector< Symbol > &data, int num_args, int axis=0)

Symbol	squeeze (const std::string &symbol_name, const std::vector< Symbol > &data, dmlc::optional< Shape > axis=dmlc::optional< Shape >())

Symbol	depth_to_space (const std::string &symbol_name, Symbol data, int block_size)

Symbol	space_to_depth (const std::string &symbol_name, Symbol data, int block_size)

Symbol	zeros_like (const std::string &symbol_name, Symbol data)

Symbol	ones_like (const std::string &symbol_name, Symbol data)

Symbol	broadcast_add (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_sub (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_mul (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_div (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_mod (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	add_n (const std::string &symbol_name, const std::vector< Symbol > &args)

Symbol	argmax (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(), bool keepdims=false)

Symbol	argmin (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(), bool keepdims=false)

Symbol	argmax_channel (const std::string &symbol_name, Symbol data)

Symbol	pick (const std::string &symbol_name, Symbol data, Symbol index, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool keepdims=false, PickMode mode=PickMode::kClip)

Symbol	dot (const std::string &symbol_name, Symbol lhs, Symbol rhs, bool transpose_a=false, bool transpose_b=false, DotForwardStype forward_stype=DotForwardStype::kNone)

Symbol	batch_dot (const std::string &symbol_name, Symbol lhs, Symbol rhs, bool transpose_a=false, bool transpose_b=false, Batch_dotForwardStype forward_stype=Batch_dotForwardStype::kNone)

Symbol	relu (const std::string &symbol_name, Symbol data)

Symbol	sigmoid (const std::string &symbol_name, Symbol data)

Symbol	hard_sigmoid (const std::string &symbol_name, Symbol data, mx_float alpha=0.2, mx_float beta=0.5)

Symbol	softsign (const std::string &symbol_name, Symbol data)

Symbol	BlockGrad (const std::string &symbol_name, Symbol data)

Symbol	make_loss (const std::string &symbol_name, Symbol data)

Symbol	reshape_like (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	shape_array (const std::string &symbol_name, Symbol data, dmlc::optional< int > lhs_begin=dmlc::optional< int >(), dmlc::optional< int > lhs_end=dmlc::optional< int >(), dmlc::optional< int > rhs_begin=dmlc::optional< int >(), dmlc::optional< int > rhs_end=dmlc::optional< int >())

Symbol	size_array (const std::string &symbol_name, Symbol data)

Symbol	Cast (const std::string &symbol_name, Symbol data, CastDtype dtype)

Symbol	negative (const std::string &symbol_name, Symbol data)

Symbol	reciprocal (const std::string &symbol_name, Symbol data)

Symbol	abs (const std::string &symbol_name, Symbol data)

Symbol	sign (const std::string &symbol_name, Symbol data)

Symbol	round (const std::string &symbol_name, Symbol data)

Symbol	rint (const std::string &symbol_name, Symbol data)

Symbol	ceil (const std::string &symbol_name, Symbol data)

Symbol	floor (const std::string &symbol_name, Symbol data)

Symbol	trunc (const std::string &symbol_name, Symbol data)

Symbol	fix (const std::string &symbol_name, Symbol data)

Symbol	square (const std::string &symbol_name, Symbol data)

Symbol	sqrt (const std::string &symbol_name, Symbol data)

Symbol	rsqrt (const std::string &symbol_name, Symbol data)

Symbol	cbrt (const std::string &symbol_name, Symbol data)

Symbol	rcbrt (const std::string &symbol_name, Symbol data)

Symbol	exp (const std::string &symbol_name, Symbol data)

Symbol	log (const std::string &symbol_name, Symbol data)

Symbol	log10 (const std::string &symbol_name, Symbol data)

Symbol	log2 (const std::string &symbol_name, Symbol data)

Symbol	log1p (const std::string &symbol_name, Symbol data)

Symbol	expm1 (const std::string &symbol_name, Symbol data)

Symbol	gamma (const std::string &symbol_name, Symbol data)

Symbol	gammaln (const std::string &symbol_name, Symbol data)

Symbol	logical_not (const std::string &symbol_name, Symbol data)

Symbol	sum (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	mean (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	prod (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	nansum (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	nanprod (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	max (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	min (const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	broadcast_axis (const std::string &symbol_name, Symbol data, Shape axis=Shape(), Shape size=Shape())

Symbol	broadcast_to (const std::string &symbol_name, Symbol data, Shape shape=Shape())

Symbol	broadcast_like (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	norm (const std::string &symbol_name, Symbol data, int ord=2, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false)

Symbol	topk (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), int k=1, TopkRetTyp ret_typ=TopkRetTyp::kIndices, bool is_ascend=false, TopkDtype dtype=TopkDtype::kFloat32)

Symbol	sort (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)

Symbol	argsort (const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true, ArgsortDtype dtype=ArgsortDtype::kFloat32)

Symbol	elemwise_add (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	elemwise_sub (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	elemwise_mul (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	elemwise_div (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	Embedding (const std::string &symbol_name, Symbol data, Symbol weight, int input_dim, int output_dim, EmbeddingDtype dtype=EmbeddingDtype::kFloat32, bool sparse_grad=false)

Symbol	take (const std::string &symbol_name, Symbol a, Symbol indices, int axis=0, TakeMode mode=TakeMode::kClip)

Symbol	batch_take (const std::string &symbol_name, Symbol a, Symbol indices)

Symbol	one_hot (const std::string &symbol_name, Symbol indices, int depth, double on_value=1, double off_value=0, One_hotDtype dtype=One_hotDtype::kFloat32)

Symbol	gather_nd (const std::string &symbol_name, Symbol data, Symbol indices)

Symbol	scatter_nd (const std::string &symbol_name, Symbol data, Symbol indices, Shape shape)

Symbol	broadcast_equal (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_not_equal (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_greater (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_greater_equal (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_lesser (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_lesser_equal (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_logical_and (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_logical_or (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	broadcast_logical_xor (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	diag (const std::string &symbol_name, Symbol data, dmlc::optional< int > k=dmlc::optional< int >(0))

Symbol	where (const std::string &symbol_name, Symbol condition, Symbol x, Symbol y)

Symbol	smooth_l1 (const std::string &symbol_name, Symbol data, mx_float scalar)

Symbol	cast_storage (const std::string &symbol_name, Symbol data, Cast_storageStype stype)

Symbol	sin (const std::string &symbol_name, Symbol data)

Symbol	cos (const std::string &symbol_name, Symbol data)

Symbol	tan (const std::string &symbol_name, Symbol data)

Symbol	arcsin (const std::string &symbol_name, Symbol data)

Symbol	arccos (const std::string &symbol_name, Symbol data)

Symbol	arctan (const std::string &symbol_name, Symbol data)

Symbol	degrees (const std::string &symbol_name, Symbol data)

Symbol	radians (const std::string &symbol_name, Symbol data)

Symbol	sinh (const std::string &symbol_name, Symbol data)

Symbol	cosh (const std::string &symbol_name, Symbol data)

Symbol	tanh (const std::string &symbol_name, Symbol data)

Symbol	arcsinh (const std::string &symbol_name, Symbol data)

Symbol	arccosh (const std::string &symbol_name, Symbol data)

Symbol	arctanh (const std::string &symbol_name, Symbol data)

Symbol	Pooling (const std::string &symbol_name, Symbol data, Shape kernel=Shape(), PoolingPoolType pool_type=PoolingPoolType::kMax, bool global_pool=false, bool cudnn_off=false, PoolingPoolingConvention pooling_convention=PoolingPoolingConvention::kValid, Shape stride=Shape(), Shape pad=Shape(), dmlc::optional< int > p_value=dmlc::optional< int >(), dmlc::optional< bool > count_include_pad=dmlc::optional< bool >())

Symbol	softmax (const std::string &symbol_name, Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())

Symbol	log_softmax (const std::string &symbol_name, Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())

Symbol	Deconvolution (const std::string &symbol_name, Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), Shape adj=Shape(), Shape target_shape=Shape(), uint32_t num_group=1, uint64_t workspace=512, bool no_bias=true, DeconvolutionCudnnTune cudnn_tune=DeconvolutionCudnnTune::kNone, bool cudnn_off=false, DeconvolutionLayout layout=DeconvolutionLayout::kNone)

Symbol	Activation (const std::string &symbol_name, Symbol data, ActivationActType act_type)

Symbol	BatchNorm (const std::string &symbol_name, Symbol data, Symbol gamma, Symbol beta, Symbol moving_mean, Symbol moving_var, double eps=0.001, mx_float momentum=0.9, bool fix_gamma=true, bool use_global_stats=false, bool output_mean_var=false, int axis=1, bool cudnn_off=false)

Symbol	Convolution (const std::string &symbol_name, Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), uint32_t num_group=1, uint64_t workspace=1024, bool no_bias=false, ConvolutionCudnnTune cudnn_tune=ConvolutionCudnnTune::kNone, bool cudnn_off=false, ConvolutionLayout layout=ConvolutionLayout::kNone)

Symbol	UpSampling (const std::string &symbol_name, const std::vector< Symbol > &data, uint32_t scale, UpSamplingSampleType sample_type, int num_args, uint32_t num_filter=0, UpSamplingMultiInputMode multi_input_mode=UpSamplingMultiInputMode::kConcat, uint64_t workspace=512)

Symbol	Concat (const std::string &symbol_name, const std::vector< Symbol > &data, int num_args, int dim=1)

Symbol	LayerNorm (const std::string &symbol_name, Symbol data, Symbol gamma, Symbol beta, int axis=-1, mx_float eps=1e-05, bool output_mean_var=false)

Symbol	LRN (const std::string &symbol_name, Symbol data, uint32_t nsize, mx_float alpha=0.0001, mx_float beta=0.75, mx_float knorm=2)

Symbol	Dropout (const std::string &symbol_name, Symbol data, mx_float p=0.5, DropoutMode mode=DropoutMode::kTraining, Shape axes=Shape())

Symbol	SoftmaxActivation (const std::string &symbol_name, Symbol data, SoftmaxActivationMode mode=SoftmaxActivationMode::kInstance)

Symbol	FullyConnected (const std::string &symbol_name, Symbol data, Symbol weight, Symbol bias, int num_hidden, bool no_bias=false, bool flatten=true)

Symbol	Pad (const std::string &symbol_name, Symbol data, PadMode mode, Shape pad_width, double constant_value=0)

Symbol	LeakyReLU (const std::string &symbol_name, Symbol data, Symbol gamma, LeakyReLUActType act_type=LeakyReLUActType::kLeaky, mx_float slope=0.25, mx_float lower_bound=0.125, mx_float upper_bound=0.334)

Symbol	SwapAxis (const std::string &symbol_name, Symbol data, uint32_t dim1=0, uint32_t dim2=0)

Symbol	BatchNorm_v1 (const std::string &symbol_name, Symbol data, Symbol gamma, Symbol beta, mx_float eps=0.001, mx_float momentum=0.9, bool fix_gamma=true, bool use_global_stats=false, bool output_mean_var=false)

Symbol	softmax_cross_entropy (const std::string &symbol_name, Symbol data, Symbol label)

Symbol	LinearRegressionOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float grad_scale=1)

Symbol	MAERegressionOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float grad_scale=1)

Symbol	LogisticRegressionOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float grad_scale=1)

Symbol	IdentityAttachKLSparseReg (const std::string &symbol_name, Symbol data, mx_float sparseness_target=0.1, mx_float penalty=0.001, mx_float momentum=0.9)

Symbol	signsgd_update (const std::string &symbol_name, Symbol weight, Symbol grad, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1)

Symbol	signum_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol mom, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float wd_lh=0)

Symbol	sgd_update (const std::string &symbol_name, Symbol weight, Symbol grad, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	sgd_mom_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol mom, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	mp_sgd_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol weight32, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	mp_sgd_mom_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol mom, Symbol weight32, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	ftml_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol d, Symbol v, Symbol z, mx_float lr, int t, mx_float beta1=0.6, mx_float beta2=0.999, double epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_grad=-1)

Symbol	adam_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol mean, Symbol var, mx_float lr, mx_float beta1=0.9, mx_float beta2=0.999, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	rmsprop_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol n, mx_float lr, mx_float gamma1=0.95, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float clip_weights=-1)

Symbol	rmspropalex_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol n, Symbol g, Symbol delta, mx_float lr, mx_float gamma1=0.95, mx_float gamma2=0.9, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float clip_weights=-1)

Symbol	ftrl_update (const std::string &symbol_name, Symbol weight, Symbol grad, Symbol z, Symbol n, mx_float lr, mx_float lamda1=0.01, mx_float beta=1, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1)

Symbol	SliceChannel (const std::string &symbol_name, Symbol data, int num_outputs, int axis=1, bool squeeze_axis=false)

Symbol	InstanceNorm (const std::string &symbol_name, Symbol data, Symbol gamma, Symbol beta, mx_float eps=0.001)

Symbol	GridGenerator (const std::string &symbol_name, Symbol data, GridGeneratorTransformType transform_type, Shape target_shape=Shape(0, 0))

Symbol	Pooling_v1 (const std::string &symbol_name, Symbol data, Shape kernel=Shape(), Pooling_v1PoolType pool_type=Pooling_v1PoolType::kMax, bool global_pool=false, Pooling_v1PoolingConvention pooling_convention=Pooling_v1PoolingConvention::kValid, Shape stride=Shape(), Shape pad=Shape())

Symbol	RNN (const std::string &symbol_name, Symbol data, Symbol parameters, Symbol state, Symbol state_cell, uint32_t state_size, uint32_t num_layers, RNNMode mode, bool bidirectional=false, mx_float p=0, bool state_outputs=false, dmlc::optional< int > projection_size=dmlc::optional< int >(), dmlc::optional< double > lstm_state_clip_min=dmlc::optional< double >(), dmlc::optional< double > lstm_state_clip_max=dmlc::optional< double >(), bool lstm_state_clip_nan=false)

Symbol	Convolution_v1 (const std::string &symbol_name, Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), uint32_t num_group=1, uint64_t workspace=1024, bool no_bias=false, Convolution_v1CudnnTune cudnn_tune=Convolution_v1CudnnTune::kNone, bool cudnn_off=false, Convolution_v1Layout layout=Convolution_v1Layout::kNone)

Symbol	Crop (const std::string &symbol_name, const std::vector< Symbol > &data, int num_args, Shape offset=Shape(0, 0), Shape h_w=Shape(0, 0), bool center_crop=false)

Symbol	SequenceReverse (const std::string &symbol_name, Symbol data, Symbol sequence_length, bool use_sequence_length=false, int axis=0)

Symbol	SpatialTransformer (const std::string &symbol_name, Symbol data, Symbol loc, SpatialTransformerTransformType transform_type, SpatialTransformerSamplerType sampler_type, Shape target_shape=Shape(0, 0))

Symbol	SoftmaxOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float grad_scale=1, mx_float ignore_label=-1, bool multi_output=false, bool use_ignore=false, bool preserve_shape=false, SoftmaxOutputNormalization normalization=SoftmaxOutputNormalization::kNull, bool out_grad=false, mx_float smooth_alpha=0)

Symbol	Softmax (const std::string &symbol_name, Symbol data, mx_float grad_scale=1, mx_float ignore_label=-1, bool multi_output=false, bool use_ignore=false, bool preserve_shape=false, SoftmaxNormalization normalization=SoftmaxNormalization::kNull, bool out_grad=false, mx_float smooth_alpha=0)

Symbol	BilinearSampler (const std::string &symbol_name, Symbol data, Symbol grid)

Symbol	ROIPooling (const std::string &symbol_name, Symbol data, Symbol rois, Shape pooled_size, mx_float spatial_scale)

Symbol	SequenceLast (const std::string &symbol_name, Symbol data, Symbol sequence_length, bool use_sequence_length=false, int axis=0)

Symbol	L2Normalization (const std::string &symbol_name, Symbol data, mx_float eps=1e-10, L2NormalizationMode mode=L2NormalizationMode::kInstance)

Symbol	MakeLoss (const std::string &symbol_name, Symbol data, mx_float grad_scale=1, mx_float valid_thresh=0, MakeLossNormalization normalization=MakeLossNormalization::kNull)

Symbol	SVMOutput (const std::string &symbol_name, Symbol data, Symbol label, mx_float margin=1, mx_float regularization_coefficient=1, bool use_linear=false)

Symbol	Correlation (const std::string &symbol_name, Symbol data1, Symbol data2, uint32_t kernel_size=1, uint32_t max_displacement=1, uint32_t stride1=1, uint32_t stride2=1, uint32_t pad_size=0, bool is_multiply=true)

Symbol	SequenceMask (const std::string &symbol_name, Symbol data, Symbol sequence_length, bool use_sequence_length=false, mx_float value=0, int axis=0)

Symbol	choose_element_0index (const std::string &symbol_name, Symbol lhs, Symbol rhs)

Symbol	fill_element_0index (const std::string &symbol_name, Symbol lhs, Symbol mhs, Symbol rhs)

Symbol	khatri_rao (const std::vector< Symbol > &args)

Symbol	Custom (const std::vector< Symbol > &data, const std::string &op_type)

Symbol	broadcast_power (Symbol lhs, Symbol rhs)

Symbol	broadcast_maximum (Symbol lhs, Symbol rhs)

Symbol	broadcast_minimum (Symbol lhs, Symbol rhs)

Symbol	broadcast_hypot (Symbol lhs, Symbol rhs)

Symbol	Reshape (Symbol data, Shape shape=Shape(), bool reverse=false, Shape target_shape=Shape(), bool keep_highest=false)

Symbol	Flatten (Symbol data)

Symbol	transpose (Symbol data, Shape axes=Shape())

Symbol	expand_dims (Symbol data, int axis)

Symbol	slice (Symbol data, Shape begin, Shape end, Shape step=Shape())

Symbol	slice_axis (Symbol data, int axis, int begin, dmlc::optional< int > end)

Symbol	slice_like (Symbol data, Symbol shape_like, Shape axes=Shape())

Symbol	clip (Symbol data, mx_float a_min, mx_float a_max)

Symbol	repeat (Symbol data, int repeats, dmlc::optional< int > axis=dmlc::optional< int >())

Symbol	tile (Symbol data, Shape reps)

Symbol	reverse (Symbol data, Shape axis)

Symbol	stack (const std::vector< Symbol > &data, int num_args, int axis=0)

Symbol	squeeze (const std::vector< Symbol > &data, dmlc::optional< Shape > axis=dmlc::optional< Shape >())

Symbol	depth_to_space (Symbol data, int block_size)

Symbol	space_to_depth (Symbol data, int block_size)

Symbol	zeros_like (Symbol data)

Symbol	ones_like (Symbol data)

Symbol	broadcast_add (Symbol lhs, Symbol rhs)

Symbol	broadcast_sub (Symbol lhs, Symbol rhs)

Symbol	broadcast_mul (Symbol lhs, Symbol rhs)

Symbol	broadcast_div (Symbol lhs, Symbol rhs)

Symbol	broadcast_mod (Symbol lhs, Symbol rhs)

Symbol	add_n (const std::vector< Symbol > &args)

Symbol	argmax (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(), bool keepdims=false)

Symbol	argmin (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(), bool keepdims=false)

Symbol	argmax_channel (Symbol data)

Symbol	pick (Symbol data, Symbol index, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool keepdims=false, PickMode mode=PickMode::kClip)

Symbol	dot (Symbol lhs, Symbol rhs, bool transpose_a=false, bool transpose_b=false, DotForwardStype forward_stype=DotForwardStype::kNone)

Symbol	batch_dot (Symbol lhs, Symbol rhs, bool transpose_a=false, bool transpose_b=false, Batch_dotForwardStype forward_stype=Batch_dotForwardStype::kNone)

Symbol	relu (Symbol data)

Symbol	sigmoid (Symbol data)

Symbol	hard_sigmoid (Symbol data, mx_float alpha=0.2, mx_float beta=0.5)

Symbol	softsign (Symbol data)

Symbol	BlockGrad (Symbol data)

Symbol	make_loss (Symbol data)

Symbol	reshape_like (Symbol lhs, Symbol rhs)

Symbol	shape_array (Symbol data, dmlc::optional< int > lhs_begin=dmlc::optional< int >(), dmlc::optional< int > lhs_end=dmlc::optional< int >(), dmlc::optional< int > rhs_begin=dmlc::optional< int >(), dmlc::optional< int > rhs_end=dmlc::optional< int >())

Symbol	size_array (Symbol data)

Symbol	Cast (Symbol data, CastDtype dtype)

Symbol	negative (Symbol data)

Symbol	reciprocal (Symbol data)

Symbol	abs (Symbol data)

Symbol	sign (Symbol data)

Symbol	round (Symbol data)

Symbol	rint (Symbol data)

Symbol	ceil (Symbol data)

Symbol	floor (Symbol data)

Symbol	trunc (Symbol data)

Symbol	fix (Symbol data)

Symbol	square (Symbol data)

Symbol	sqrt (Symbol data)

Symbol	rsqrt (Symbol data)

Symbol	cbrt (Symbol data)

Symbol	rcbrt (Symbol data)

Symbol	exp (Symbol data)

Symbol	log (Symbol data)

Symbol	log10 (Symbol data)

Symbol	log2 (Symbol data)

Symbol	log1p (Symbol data)

Symbol	expm1 (Symbol data)

Symbol	gamma (Symbol data)

Symbol	gammaln (Symbol data)

Symbol	logical_not (Symbol data)

Symbol	sum (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	mean (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	prod (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	nansum (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	nanprod (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	max (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	min (Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)

Symbol	broadcast_axis (Symbol data, Shape axis=Shape(), Shape size=Shape())

Symbol	broadcast_to (Symbol data, Shape shape=Shape())

Symbol	broadcast_like (Symbol lhs, Symbol rhs)

Symbol	norm (Symbol data, int ord=2, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false)

Symbol	topk (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), int k=1, TopkRetTyp ret_typ=TopkRetTyp::kIndices, bool is_ascend=false, TopkDtype dtype=TopkDtype::kFloat32)

Symbol	sort (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)

Symbol	argsort (Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true, ArgsortDtype dtype=ArgsortDtype::kFloat32)

Symbol	elemwise_add (Symbol lhs, Symbol rhs)

Symbol	elemwise_sub (Symbol lhs, Symbol rhs)

Symbol	elemwise_mul (Symbol lhs, Symbol rhs)

Symbol	elemwise_div (Symbol lhs, Symbol rhs)

Symbol	Embedding (Symbol data, Symbol weight, int input_dim, int output_dim, EmbeddingDtype dtype=EmbeddingDtype::kFloat32, bool sparse_grad=false)

Symbol	take (Symbol a, Symbol indices, int axis=0, TakeMode mode=TakeMode::kClip)

Symbol	batch_take (Symbol a, Symbol indices)

Symbol	one_hot (Symbol indices, int depth, double on_value=1, double off_value=0, One_hotDtype dtype=One_hotDtype::kFloat32)

Symbol	gather_nd (Symbol data, Symbol indices)

Symbol	scatter_nd (Symbol data, Symbol indices, Shape shape)

Symbol	broadcast_equal (Symbol lhs, Symbol rhs)

Symbol	broadcast_not_equal (Symbol lhs, Symbol rhs)

Symbol	broadcast_greater (Symbol lhs, Symbol rhs)

Symbol	broadcast_greater_equal (Symbol lhs, Symbol rhs)

Symbol	broadcast_lesser (Symbol lhs, Symbol rhs)

Symbol	broadcast_lesser_equal (Symbol lhs, Symbol rhs)

Symbol	broadcast_logical_and (Symbol lhs, Symbol rhs)

Symbol	broadcast_logical_or (Symbol lhs, Symbol rhs)

Symbol	broadcast_logical_xor (Symbol lhs, Symbol rhs)

Symbol	diag (Symbol data, dmlc::optional< int > k=dmlc::optional< int >(0))

Symbol	where (Symbol condition, Symbol x, Symbol y)

Symbol	smooth_l1 (Symbol data, mx_float scalar)

Symbol	cast_storage (Symbol data, Cast_storageStype stype)

Symbol	sin (Symbol data)

Symbol	cos (Symbol data)

Symbol	tan (Symbol data)

Symbol	arcsin (Symbol data)

Symbol	arccos (Symbol data)

Symbol	arctan (Symbol data)

Symbol	degrees (Symbol data)

Symbol	radians (Symbol data)

Symbol	sinh (Symbol data)

Symbol	cosh (Symbol data)

Symbol	tanh (Symbol data)

Symbol	arcsinh (Symbol data)

Symbol	arccosh (Symbol data)

Symbol	arctanh (Symbol data)

Symbol	Pooling (Symbol data, Shape kernel=Shape(), PoolingPoolType pool_type=PoolingPoolType::kMax, bool global_pool=false, bool cudnn_off=false, PoolingPoolingConvention pooling_convention=PoolingPoolingConvention::kValid, Shape stride=Shape(), Shape pad=Shape(), dmlc::optional< int > p_value=dmlc::optional< int >(), dmlc::optional< bool > count_include_pad=dmlc::optional< bool >())

Symbol	softmax (Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())

Symbol	log_softmax (Symbol data, int axis=-1, dmlc::optional< double > temperature=dmlc::optional< double >())

Symbol	Deconvolution (Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), Shape adj=Shape(), Shape target_shape=Shape(), uint32_t num_group=1, uint64_t workspace=512, bool no_bias=true, DeconvolutionCudnnTune cudnn_tune=DeconvolutionCudnnTune::kNone, bool cudnn_off=false, DeconvolutionLayout layout=DeconvolutionLayout::kNone)

Symbol	Activation (Symbol data, ActivationActType act_type)

Symbol	BatchNorm (Symbol data, Symbol gamma, Symbol beta, Symbol moving_mean, Symbol moving_var, double eps=0.001, mx_float momentum=0.9, bool fix_gamma=true, bool use_global_stats=false, bool output_mean_var=false, int axis=1, bool cudnn_off=false)

Symbol	Convolution (Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), uint32_t num_group=1, uint64_t workspace=1024, bool no_bias=false, ConvolutionCudnnTune cudnn_tune=ConvolutionCudnnTune::kNone, bool cudnn_off=false, ConvolutionLayout layout=ConvolutionLayout::kNone)

Symbol	UpSampling (const std::vector< Symbol > &data, uint32_t scale, UpSamplingSampleType sample_type, int num_args, uint32_t num_filter=0, UpSamplingMultiInputMode multi_input_mode=UpSamplingMultiInputMode::kConcat, uint64_t workspace=512)

Symbol	Concat (const std::vector< Symbol > &data, int num_args, int dim=1)

Symbol	LayerNorm (Symbol data, Symbol gamma, Symbol beta, int axis=-1, mx_float eps=1e-05, bool output_mean_var=false)

Symbol	LRN (Symbol data, uint32_t nsize, mx_float alpha=0.0001, mx_float beta=0.75, mx_float knorm=2)

Symbol	Dropout (Symbol data, mx_float p=0.5, DropoutMode mode=DropoutMode::kTraining, Shape axes=Shape())

Symbol	SoftmaxActivation (Symbol data, SoftmaxActivationMode mode=SoftmaxActivationMode::kInstance)

Symbol	FullyConnected (Symbol data, Symbol weight, Symbol bias, int num_hidden, bool no_bias=false, bool flatten=true)

Symbol	Pad (Symbol data, PadMode mode, Shape pad_width, double constant_value=0)

Symbol	LeakyReLU (Symbol data, Symbol gamma, LeakyReLUActType act_type=LeakyReLUActType::kLeaky, mx_float slope=0.25, mx_float lower_bound=0.125, mx_float upper_bound=0.334)

Symbol	SwapAxis (Symbol data, uint32_t dim1=0, uint32_t dim2=0)

Symbol	BatchNorm_v1 (Symbol data, Symbol gamma, Symbol beta, mx_float eps=0.001, mx_float momentum=0.9, bool fix_gamma=true, bool use_global_stats=false, bool output_mean_var=false)

Symbol	softmax_cross_entropy (Symbol data, Symbol label)

Symbol	LinearRegressionOutput (Symbol data, Symbol label, mx_float grad_scale=1)

Symbol	MAERegressionOutput (Symbol data, Symbol label, mx_float grad_scale=1)

Symbol	LogisticRegressionOutput (Symbol data, Symbol label, mx_float grad_scale=1)

Symbol	IdentityAttachKLSparseReg (Symbol data, mx_float sparseness_target=0.1, mx_float penalty=0.001, mx_float momentum=0.9)

Symbol	signsgd_update (Symbol weight, Symbol grad, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1)

Symbol	signum_update (Symbol weight, Symbol grad, Symbol mom, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float wd_lh=0)

Symbol	sgd_update (Symbol weight, Symbol grad, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	sgd_mom_update (Symbol weight, Symbol grad, Symbol mom, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	mp_sgd_update (Symbol weight, Symbol grad, Symbol weight32, mx_float lr, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	mp_sgd_mom_update (Symbol weight, Symbol grad, Symbol mom, Symbol weight32, mx_float lr, mx_float momentum=0, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	ftml_update (Symbol weight, Symbol grad, Symbol d, Symbol v, Symbol z, mx_float lr, int t, mx_float beta1=0.6, mx_float beta2=0.999, double epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_grad=-1)

Symbol	adam_update (Symbol weight, Symbol grad, Symbol mean, Symbol var, mx_float lr, mx_float beta1=0.9, mx_float beta2=0.999, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, bool lazy_update=true)

Symbol	rmsprop_update (Symbol weight, Symbol grad, Symbol n, mx_float lr, mx_float gamma1=0.95, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float clip_weights=-1)

Symbol	rmspropalex_update (Symbol weight, Symbol grad, Symbol n, Symbol g, Symbol delta, mx_float lr, mx_float gamma1=0.95, mx_float gamma2=0.9, mx_float epsilon=1e-08, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1, mx_float clip_weights=-1)

Symbol	ftrl_update (Symbol weight, Symbol grad, Symbol z, Symbol n, mx_float lr, mx_float lamda1=0.01, mx_float beta=1, mx_float wd=0, mx_float rescale_grad=1, mx_float clip_gradient=-1)

Symbol	SliceChannel (Symbol data, int num_outputs, int axis=1, bool squeeze_axis=false)

Symbol	InstanceNorm (Symbol data, Symbol gamma, Symbol beta, mx_float eps=0.001)

Symbol	GridGenerator (Symbol data, GridGeneratorTransformType transform_type, Shape target_shape=Shape(0, 0))

Symbol	Pooling_v1 (Symbol data, Shape kernel=Shape(), Pooling_v1PoolType pool_type=Pooling_v1PoolType::kMax, bool global_pool=false, Pooling_v1PoolingConvention pooling_convention=Pooling_v1PoolingConvention::kValid, Shape stride=Shape(), Shape pad=Shape())

Symbol	RNN (Symbol data, Symbol parameters, Symbol state, Symbol state_cell, uint32_t state_size, uint32_t num_layers, RNNMode mode, bool bidirectional=false, mx_float p=0, bool state_outputs=false, dmlc::optional< int > projection_size=dmlc::optional< int >(), dmlc::optional< double > lstm_state_clip_min=dmlc::optional< double >(), dmlc::optional< double > lstm_state_clip_max=dmlc::optional< double >(), bool lstm_state_clip_nan=false)

Symbol	Convolution_v1 (Symbol data, Symbol weight, Symbol bias, Shape kernel, uint32_t num_filter, Shape stride=Shape(), Shape dilate=Shape(), Shape pad=Shape(), uint32_t num_group=1, uint64_t workspace=1024, bool no_bias=false, Convolution_v1CudnnTune cudnn_tune=Convolution_v1CudnnTune::kNone, bool cudnn_off=false, Convolution_v1Layout layout=Convolution_v1Layout::kNone)

Symbol	Crop (const std::vector< Symbol > &data, int num_args, Shape offset=Shape(0, 0), Shape h_w=Shape(0, 0), bool center_crop=false)

Symbol	SequenceReverse (Symbol data, Symbol sequence_length, bool use_sequence_length=false, int axis=0)

Symbol	SpatialTransformer (Symbol data, Symbol loc, SpatialTransformerTransformType transform_type, SpatialTransformerSamplerType sampler_type, Shape target_shape=Shape(0, 0))

Symbol	SoftmaxOutput (Symbol data, Symbol label, mx_float grad_scale=1, mx_float ignore_label=-1, bool multi_output=false, bool use_ignore=false, bool preserve_shape=false, SoftmaxOutputNormalization normalization=SoftmaxOutputNormalization::kNull, bool out_grad=false, mx_float smooth_alpha=0)

Symbol	Softmax (Symbol data, mx_float grad_scale=1, mx_float ignore_label=-1, bool multi_output=false, bool use_ignore=false, bool preserve_shape=false, SoftmaxNormalization normalization=SoftmaxNormalization::kNull, bool out_grad=false, mx_float smooth_alpha=0)

Symbol	BilinearSampler (Symbol data, Symbol grid)

Symbol	ROIPooling (Symbol data, Symbol rois, Shape pooled_size, mx_float spatial_scale)

Symbol	SequenceLast (Symbol data, Symbol sequence_length, bool use_sequence_length=false, int axis=0)

Symbol	L2Normalization (Symbol data, mx_float eps=1e-10, L2NormalizationMode mode=L2NormalizationMode::kInstance)

Symbol	MakeLoss (Symbol data, mx_float grad_scale=1, mx_float valid_thresh=0, MakeLossNormalization normalization=MakeLossNormalization::kNull)

Symbol	SVMOutput (Symbol data, Symbol label, mx_float margin=1, mx_float regularization_coefficient=1, bool use_linear=false)

Symbol	Correlation (Symbol data1, Symbol data2, uint32_t kernel_size=1, uint32_t max_displacement=1, uint32_t stride1=1, uint32_t stride2=1, uint32_t pad_size=0, bool is_multiply=true)

Symbol	SequenceMask (Symbol data, Symbol sequence_length, bool use_sequence_length=false, mx_float value=0, int axis=0)

Symbol	choose_element_0index (Symbol lhs, Symbol rhs)

Symbol	fill_element_0index (Symbol lhs, Symbol mhs, Symbol rhs)

Symbol	_Plus (Symbol lhs, Symbol rhs)

Symbol	_Mul (Symbol lhs, Symbol rhs)

Symbol	_Minus (Symbol lhs, Symbol rhs)

Symbol	_Div (Symbol lhs, Symbol rhs)

Symbol	_Mod (Symbol lhs, Symbol rhs)

Symbol	_Power (Symbol lhs, Symbol rhs)

Symbol	_Maximum (Symbol lhs, Symbol rhs)

Symbol	_Minimum (Symbol lhs, Symbol rhs)

Symbol	_PlusScalar (Symbol lhs, mx_float scalar)

Symbol	_MinusScalar (Symbol lhs, mx_float scalar)

Symbol	_RMinusScalar (mx_float scalar, Symbol rhs)

Symbol	_MulScalar (Symbol lhs, mx_float scalar)

Symbol	_DivScalar (Symbol lhs, mx_float scalar)

Symbol	_RDivScalar (mx_float scalar, Symbol rhs)

Symbol	_ModScalar (Symbol lhs, mx_float scalar)

Symbol	_RModScalar (mx_float scalar, Symbol rhs)

Symbol	_PowerScalar (Symbol lhs, mx_float scalar)

Symbol	_RPowerScalar (mx_float scalar, Symbol rhs)

Symbol	_MaximumScalar (Symbol lhs, mx_float scalar)

Symbol	_MinimumScalar (Symbol lhs, mx_float scalar)

Symbol	Crop (const std::string &symbol_name, int num_args, Symbol data, Symbol crop_like, Shape offset=Shape(0, 0), Shape h_w=Shape(0, 0), bool center_crop=false)

Symbol	Activation (const std::string &symbol_name, Symbol data, const std::string &act_type)
	Apply activation function to input. Softmax Activation is only available with CUDNN on GPUand will be computed at each location across channel if input is 4D. More...

std::ostream &	operator<< (std::ostream &os, const Shape &shape)
	allow string printing of the shape More...

std::istream &	operator>> (std::istream &is, Shape &shape)
	read shape from the istream More...

Symbol	operator+ (mx_float lhs, const Symbol &rhs)

Symbol	operator- (mx_float lhs, const Symbol &rhs)

Symbol	operator* (mx_float lhs, const Symbol &rhs)

Symbol	operator/ (mx_float lhs, const Symbol &rhs)

Symbol	operator% (mx_float lhs, const Symbol &rhs)

Typedef Documentation

typedef unsigned mxnet::cpp::index_t

typedef std::function<Optimizer*()> mxnet::cpp::OptimizerCreator

Enumeration Type Documentation

enum mxnet::cpp::ActivationActType

strong

Activation function to be applied.

Enumerator
kRelu
kSigmoid
kSoftrelu
kSoftsign
kTanh

enum mxnet::cpp::ArgsortDtype

strong

DType of the output indices. It is only valid when ret_typ is "indices" or "both". An error will be raised if the selected data type cannot precisely

Enumerator
kFloat16
kFloat32
kFloat64
kInt32
kUint8

enum mxnet::cpp::Batch_dotForwardStype

strong

The desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still produce an output of the

Enumerator
kNone
kCsr
kDefault
kRow_sparse

enum mxnet::cpp::Cast_storageStype

strong

Output storage type.

Enumerator
kCsr
kDefault
kRow_sparse

enum mxnet::cpp::CastDtype

strong

Output data type.

Enumerator
kFloat16
kFloat32
kFloat64
kInt32
kInt64
kInt8
kUint8

enum mxnet::cpp::Convolution_v1CudnnTune

strong

Whether to pick convolution algo by running performance test. Leads to higher startup time but may give faster speed. Options are: 'off': no tuning 'limited_workspace': run test and pick the fastest algorithm that doesn't 'fastest': pick the fastest algorithm and ignore workspace limit. If set to None (default), behavior is determined by environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off, 1 for limited workspace (default), 2 for fastest.

Enumerator
kNone
kFastest
kLimited_workspace
kOff

enum mxnet::cpp::Convolution_v1Layout

strong

Set layout for input, output and weight. Empty for default layout: NCHW for 2d and NCDHW for 3d.

Enumerator
kNone
kNCDHW
kNCHW
kNDHWC
kNHWC

enum mxnet::cpp::ConvolutionCudnnTune

strong

Whether to pick convolution algo by running performance test.

Enumerator
kNone
kFastest
kLimited_workspace
kOff

enum mxnet::cpp::ConvolutionLayout

strong

Set layout for input, output and weight. Empty for default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are

Enumerator
kNone
kNCDHW
kNCHW
kNCW
kNDHWC
kNHWC

enum mxnet::cpp::DeconvolutionCudnnTune

strong

Whether to pick convolution algorithm by running performance test.

Enumerator
kNone
kFastest
kLimited_workspace
kOff

enum mxnet::cpp::DeconvolutionLayout

strong

Set layout for input, output and weight. Empty for default layout, NCW for 1d,

Enumerator
kNone
kNCDHW
kNCHW
kNCW
kNDHWC
kNHWC

enum mxnet::cpp::DeviceType

Enumerator
kCPU
kGPU
kCPUPinned

enum mxnet::cpp::DotForwardStype

strong

The desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still produce an output of the

Enumerator
kNone
kCsr
kDefault
kRow_sparse

enum mxnet::cpp::DropoutMode

strong

Whether to only turn on dropout during training or to also turn on for

Enumerator
kAlways
kTraining

enum mxnet::cpp::EmbeddingDtype

strong

Data type of weight.

Enumerator
kFloat16
kFloat32
kFloat64
kInt32
kInt64
kInt8
kUint8

enum mxnet::cpp::GridGeneratorTransformType

strong

The type of transformation. For affine, input data should be an affine matrix of size (batch, 6). For warp, input data should be an optical flow of size

Enumerator
kAffine
kWarp

enum mxnet::cpp::L2NormalizationMode

strong

Specify the dimension along which to compute L2 norm.

Enumerator
kChannel
kInstance
kSpatial

enum mxnet::cpp::LeakyReLUActType

strong

Activation function to be applied.

Enumerator
kElu
kLeaky
kPrelu
kRrelu
kSelu

enum mxnet::cpp::MakeLossNormalization

strong

If this is set to null, the output gradient will not be normalized. If this is set to batch, the output gradient will be divided by the batch size. If this is set to valid, the output gradient will be divided by the number of valid input

Enumerator
kBatch
kNull
kValid

enum mxnet::cpp::One_hotDtype

strong

DType of the output

Enumerator
kFloat16
kFloat32
kFloat64
kInt32
kInt64
kInt8
kUint8

enum mxnet::cpp::OpReqType

Enumerator
kNullOp	no operation, do not write anything
kWriteTo	write gradient to provided space
kWriteInplace	perform an inplace write, Target shares memory with one of input arguments. This option only happen when
kAddTo	add to the provided space

enum mxnet::cpp::PadMode

strong

Padding type to use. "constant" pads with constant_value "edge" pads using the edge values of the input array "reflect" pads by reflecting values with

Enumerator
kConstant
kEdge
kReflect

enum mxnet::cpp::PickMode

strong

Specify how out-of-bound indices behave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap" means to wrap

Enumerator
kClip
kWrap

enum mxnet::cpp::Pooling_v1PoolingConvention

strong

Pooling convention to be applied.

Enumerator
kFull
kValid

enum mxnet::cpp::Pooling_v1PoolType

strong

Pooling type to be applied.

Enumerator
kAvg
kMax
kSum

enum mxnet::cpp::PoolingPoolingConvention

strong

Pooling convention to be applied.

Enumerator
kFull
kValid

enum mxnet::cpp::PoolingPoolType

strong

Pooling type to be applied.

Enumerator
kAvg
kLp
kMax
kSum

enum mxnet::cpp::RNNMode

strong

the type of RNN to compute

Enumerator
kGru
kLstm
kRnn_relu
kRnn_tanh

enum mxnet::cpp::SoftmaxActivationMode

strong

Specifies how to compute the softmax. If set to instance, it computes softmax for each instance. If set to channel, It computes cross channel

Enumerator
kChannel
kInstance

enum mxnet::cpp::SoftmaxNormalization

strong

Normalizes the gradient.

Enumerator
kBatch
kNull
kValid

enum mxnet::cpp::SoftmaxOutputNormalization

strong

Normalizes the gradient.

Enumerator
kBatch
kNull
kValid

enum mxnet::cpp::SpatialTransformerSamplerType

strong

sampling type

Enumerator
kBilinear

enum mxnet::cpp::SpatialTransformerTransformType

strong

transformation type

Enumerator
kAffine

enum mxnet::cpp::TakeMode

strong

Specify how out-of-bound indices bahave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap" means to wrap

Enumerator
kClip
kRaise
kWrap

enum mxnet::cpp::TopkDtype

strong

DType of the output indices when ret_typ is "indices" or "both". An error will

Enumerator
kFloat16
kFloat32
kFloat64
kInt32
kUint8

enum mxnet::cpp::TopkRetTyp

strong

The return type. "value" means to return the top k values, "indices" means to return the indices of the top k values, "mask" means to return a mask array containing 0 and 1. 1 means the top k values. "both" means to return a list of both values and

Enumerator
kBoth
kIndices
kMask
kValue

enum mxnet::cpp::UpSamplingMultiInputMode

strong

How to handle multiple input. concat means concatenate upsampled images along the channel dimension. sum means add all images together, only available for

Enumerator
kConcat
kSum

enum mxnet::cpp::UpSamplingSampleType

strong

upsampling method

Enumerator
kBilinear
kNearest

Function Documentation

NDArray mxnet::cpp::_default_monitor_func ( const NDArray & x )

Default function for monitor that computes statistics of the input tensor, which is the mean absolute |x|/size(x)

Parameters

x	The input tensor

Returns: The statistics of the input tensor

Symbol mxnet::cpp::_Div	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_DivScalar	(	Symbol	lhs,
		mx_float	scalar
	)

inline

Symbol mxnet::cpp::_Maximum	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_MaximumScalar	(	Symbol	lhs,
		mx_float	scalar
	)

inline

Symbol mxnet::cpp::_Minimum	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_MinimumScalar	(	Symbol	lhs,
		mx_float	scalar
	)

inline

Symbol mxnet::cpp::_Minus	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_MinusScalar	(	Symbol	lhs,
		mx_float	scalar
	)

inline

Symbol mxnet::cpp::_Mod	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_ModScalar	(	Symbol	lhs,
		mx_float	scalar
	)

inline

Symbol mxnet::cpp::_Mul	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_MulScalar	(	Symbol	lhs,
		mx_float	scalar
	)

inline

Symbol mxnet::cpp::_Plus	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_PlusScalar	(	Symbol	lhs,
		mx_float	scalar
	)

inline

Symbol mxnet::cpp::_Power	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_PowerScalar	(	Symbol	lhs,
		mx_float	scalar
	)

inline

Symbol mxnet::cpp::_RDivScalar	(	mx_float	scalar,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_RMinusScalar	(	mx_float	scalar,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_RModScalar	(	mx_float	scalar,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::_RPowerScalar	(	mx_float	scalar,
		Symbol	rhs
	)

inline

Symbol mxnet::cpp::abs	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise absolute value of the input.

   Example::

   abs([-2, 0, 3]) = [2, 0, 3]

   The storage type of ``abs`` output depends upon the input storage type:

   - abs(default) = default
   - abs(row_sparse) = row_sparse
   - abs(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L668

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::abs ( Symbol data )

inline

Returns element-wise absolute value of the input.

   Example::

   abs([-2, 0, 3]) = [2, 0, 3]

   The storage type of ``abs`` output depends upon the input storage type:

   - abs(default) = default
   - abs(row_sparse) = row_sparse
   - abs(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L668

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::Activation	(	const std::string &	symbol_name,
		Symbol	data,
		const std::string &	act_type
	)

inline

Apply activation function to input. Softmax Activation is only available with CUDNN on GPUand will be computed at each location across channel if input is 4D.

Parameters

symbol_name	name of the resulting symbol.
data	Input data to activation function.
act_type	Activation function to be applied.

Returns: new symbol

Symbol mxnet::cpp::Activation	(	const std::string &	symbol_name,
		Symbol	data,
		ActivationActType	act_type
	)

inline

Applies an activation function element-wise to the input.

   The following activation functions are supported:

   - `relu`: Rectified Linear Unit, :math:`y = max(x, 0)`
   - `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}`
   - `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) +
   - `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))`
   - `softsign`: :math:`y = \frac{x}{1 + abs(x)}`



   Defined in src/operator/nn/activation.cc:L184

Parameters

symbol_name	name of the resulting symbol
data	The input array.
act_type	Activation function to be applied.

Returns: new symbol

Symbol mxnet::cpp::Activation	(	Symbol	data,
		ActivationActType	act_type
	)

inline

Applies an activation function element-wise to the input.

   The following activation functions are supported:

   - `relu`: Rectified Linear Unit, :math:`y = max(x, 0)`
   - `sigmoid`: :math:`y = \frac{1}{1 + exp(-x)}`
   - `tanh`: Hyperbolic tangent, :math:`y = \frac{exp(x) - exp(-x)}{exp(x) +
   - `softrelu`: Soft ReLU, or SoftPlus, :math:`y = log(1 + exp(x))`
   - `softsign`: :math:`y = \frac{x}{1 + abs(x)}`



   Defined in src/operator/nn/activation.cc:L184

Parameters

data	The input array.
act_type	Activation function to be applied.

Returns: new symbol

Symbol mxnet::cpp::adam_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		Symbol	mean,
		Symbol	var,
		mx_float	lr,
		mx_float	beta1 = `0.9`,
		mx_float	beta2 = `0.999`,
		mx_float	epsilon = `1e-08`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Update function for Adam optimizer. Adam is seen as a generalization of AdaGrad.

Adam update consists of the following steps, where g represents gradient and m, are 1st and 2nd order moment estimates (mean and variance).

.. math::

g_t = J(W_{t-1})\ m_t = m_{t-1} + (1 - ) g_t\ v_t = v_{t-1} + (1 - ) g_t^2\ W_t = W_{t-1} - { m_t }{ { v_t } + }

It updates the weights using::

m = beta1*m + (1-beta1)*grad v = beta2*v + (1-beta2)*(grad**2) w += - learning_rate * m / (sqrt(v) + epsilon)

However, if grad's storage type is row_sparse, lazy_update is True and type of weight is the same as those of m and v, only the row slices whose indices appear in grad.indices are updated (for w, m

for row in grad.indices: m[row] = beta1*m[row] + (1-beta1)*grad[row] v[row] = beta2*v[row] + (1-beta2)*(grad[row]**2) w[row] += - learning_rate * m[row] / (sqrt(v[row]) + epsilon)

   Defined in src/operator/optimizer_op.cc:L495

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
mean	Moving mean
var	Moving variance
lr	Learning rate
beta1	The decay rate for the 1st moment estimates.
beta2	The decay rate for the 2nd moment estimates.
epsilon	A small constant for numerical stability.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse

Returns: new symbol

Symbol mxnet::cpp::adam_update	(	Symbol	weight,
		Symbol	grad,
		Symbol	mean,
		Symbol	var,
		mx_float	lr,
		mx_float	beta1 = `0.9`,
		mx_float	beta2 = `0.999`,
		mx_float	epsilon = `1e-08`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Update function for Adam optimizer. Adam is seen as a generalization of AdaGrad.

Adam update consists of the following steps, where g represents gradient and m, are 1st and 2nd order moment estimates (mean and variance).

.. math::

g_t = J(W_{t-1})\ m_t = m_{t-1} + (1 - ) g_t\ v_t = v_{t-1} + (1 - ) g_t^2\ W_t = W_{t-1} - { m_t }{ { v_t } + }

It updates the weights using::

m = beta1*m + (1-beta1)*grad v = beta2*v + (1-beta2)*(grad**2) w += - learning_rate * m / (sqrt(v) + epsilon)

However, if grad's storage type is row_sparse, lazy_update is True and type of weight is the same as those of m and v, only the row slices whose indices appear in grad.indices are updated (for w, m

for row in grad.indices: m[row] = beta1*m[row] + (1-beta1)*grad[row] v[row] = beta2*v[row] + (1-beta2)*(grad[row]**2) w[row] += - learning_rate * m[row] / (sqrt(v[row]) + epsilon)

   Defined in src/operator/optimizer_op.cc:L495

Parameters

weight	Weight
grad	Gradient
mean	Moving mean
var	Moving variance
lr	Learning rate
beta1	The decay rate for the 1st moment estimates.
beta2	The decay rate for the 2nd moment estimates.
epsilon	A small constant for numerical stability.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse

Returns: new symbol

Symbol mxnet::cpp::add_n	(	const std::string &	symbol_name,
		const std::vector< Symbol > &	args
	)

inline

Adds all input arguments element-wise.

   .. math::
   add\_n(a_1, a_2, ..., a_n) = a_1 + a_2 + ... + a_n

   ``add_n`` is potentially more efficient than calling ``add`` by `n` times.

   The storage type of ``add_n`` output depends on storage types of inputs

   - add_n(row_sparse, row_sparse, ..) = row_sparse
   - add_n(default, csr, default) = default
   - add_n(any input combinations longer than 4 (>4) with at least one default
   - otherwise, ``add_n`` falls all inputs back to default storage and generates



   Defined in src/operator/tensor/elemwise_sum.cc:L156

Parameters

symbol_name	name of the resulting symbol
args	Positional input arguments

Returns: new symbol

Symbol mxnet::cpp::add_n ( const std::vector< Symbol > & args )

inline

Adds all input arguments element-wise.

   .. math::
   add\_n(a_1, a_2, ..., a_n) = a_1 + a_2 + ... + a_n

   ``add_n`` is potentially more efficient than calling ``add`` by `n` times.

   The storage type of ``add_n`` output depends on storage types of inputs

   - add_n(row_sparse, row_sparse, ..) = row_sparse
   - add_n(default, csr, default) = default
   - add_n(any input combinations longer than 4 (>4) with at least one default
   - otherwise, ``add_n`` falls all inputs back to default storage and generates



   Defined in src/operator/tensor/elemwise_sum.cc:L156

Parameters

args	Positional input arguments

Returns: new symbol

Symbol mxnet::cpp::arccos	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise inverse cosine of the input array.

   The input should be in range `[-1, 1]`.
   The output is in the closed interval :math:`[0, \pi]`

   .. math::
   arccos([-1, -.707, 0, .707, 1]) = [\pi, 3\pi/4, \pi/2, \pi/4, 0]

   The storage type of ``arccos`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L123

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arccos ( Symbol data )

inline

Returns element-wise inverse cosine of the input array.

   The input should be in range `[-1, 1]`.
   The output is in the closed interval :math:`[0, \pi]`

   .. math::
   arccos([-1, -.707, 0, .707, 1]) = [\pi, 3\pi/4, \pi/2, \pi/4, 0]

   The storage type of ``arccos`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L123

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arccosh	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns the element-wise inverse hyperbolic cosine of the input array, \ computed element-wise.

The storage type of arccosh output is always dense

   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L264

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arccosh ( Symbol data )

inline

Returns the element-wise inverse hyperbolic cosine of the input array, \ computed element-wise.

The storage type of arccosh output is always dense

   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L264

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arcsin	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise inverse sine of the input array.

   The input should be in the range `[-1, 1]`.
   The output is in the closed interval of [:math:`-\pi/2`, :math:`\pi/2`].

   .. math::
   arcsin([-1, -.707, 0, .707, 1]) = [-\pi/2, -\pi/4, 0, \pi/4, \pi/2]

   The storage type of ``arcsin`` output depends upon the input storage type:

   - arcsin(default) = default
   - arcsin(row_sparse) = row_sparse
   - arcsin(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L104

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arcsin ( Symbol data )

inline

Returns element-wise inverse sine of the input array.

   The input should be in the range `[-1, 1]`.
   The output is in the closed interval of [:math:`-\pi/2`, :math:`\pi/2`].

   .. math::
   arcsin([-1, -.707, 0, .707, 1]) = [-\pi/2, -\pi/4, 0, \pi/4, \pi/2]

   The storage type of ``arcsin`` output depends upon the input storage type:

   - arcsin(default) = default
   - arcsin(row_sparse) = row_sparse
   - arcsin(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L104

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arcsinh	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns the element-wise inverse hyperbolic sine of the input array, \ computed element-wise.

The storage type of arcsinh output depends upon the input storage type:

arcsinh(default) = default
arcsinh(row_sparse) = row_sparse
arcsinh(csr) = csr

   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L250

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arcsinh ( Symbol data )

inline

Returns the element-wise inverse hyperbolic sine of the input array, \ computed element-wise.

The storage type of arcsinh output depends upon the input storage type:

arcsinh(default) = default
arcsinh(row_sparse) = row_sparse
arcsinh(csr) = csr

   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L250

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arctan	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise inverse tangent of the input array.

   The output is in the closed interval :math:`[-\pi/2, \pi/2]`

   .. math::
   arctan([-1, 0, 1]) = [-\pi/4, 0, \pi/4]

   The storage type of ``arctan`` output depends upon the input storage type:

   - arctan(default) = default
   - arctan(row_sparse) = row_sparse
   - arctan(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L144

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arctan ( Symbol data )

inline

Returns element-wise inverse tangent of the input array.

   The output is in the closed interval :math:`[-\pi/2, \pi/2]`

   .. math::
   arctan([-1, 0, 1]) = [-\pi/4, 0, \pi/4]

   The storage type of ``arctan`` output depends upon the input storage type:

   - arctan(default) = default
   - arctan(row_sparse) = row_sparse
   - arctan(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L144

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arctanh	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns the element-wise inverse hyperbolic tangent of the input array, \ computed element-wise.

The storage type of arctanh output depends upon the input storage type:

arctanh(default) = default
arctanh(row_sparse) = row_sparse
arctanh(csr) = csr

   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L281

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::arctanh ( Symbol data )

inline

Returns the element-wise inverse hyperbolic tangent of the input array, \ computed element-wise.

The storage type of arctanh output depends upon the input storage type:

arctanh(default) = default
arctanh(row_sparse) = row_sparse
arctanh(csr) = csr

   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L281

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::argmax	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>()`,
		bool	keepdims = `false`
	)

inline

Returns indices of the maximum values along an axis.

   In the case of multiple occurrences of maximum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   // argmax along axis 0
   argmax(x, axis=0) = [ 1.,  1.,  1.]

   // argmax along axis 1
   argmax(x, axis=1) = [ 2.,  2.]

   // argmax along axis 1 keeping same dims as an input array
   argmax(x, axis=1, keepdims=True) = [[ 2.],
   [ 2.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L52

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axis along which to perform the reduction. Negative values means indexing from right to left. ``Requires axis to be set as int, because global \param keepdims If this is set to`True`, the reduced axis is left in the result as

Returns: new symbol

Symbol mxnet::cpp::argmax	(	Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>()`,
		bool	keepdims = `false`
	)

inline

Returns indices of the maximum values along an axis.

   In the case of multiple occurrences of maximum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   // argmax along axis 0
   argmax(x, axis=0) = [ 1.,  1.,  1.]

   // argmax along axis 1
   argmax(x, axis=1) = [ 2.,  2.]

   // argmax along axis 1 keeping same dims as an input array
   argmax(x, axis=1, keepdims=True) = [[ 2.],
   [ 2.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L52

Parameters

data	The input
axis	The axis along which to perform the reduction. Negative values means indexing from right to left. ``Requires axis to be set as int, because global \param keepdims If this is set to`True`, the reduced axis is left in the result as

Returns: new symbol

Symbol mxnet::cpp::argmax_channel	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns argmax indices of each channel from the input array.

   The result will be an NDArray of shape (num_channel,).

   In case of multiple occurrences of the maximum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   argmax_channel(x) = [ 2.,  2.]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L97

Parameters

symbol_name	name of the resulting symbol
data	The input array

Returns: new symbol

Symbol mxnet::cpp::argmax_channel ( Symbol data )

inline

Returns argmax indices of each channel from the input array.

   The result will be an NDArray of shape (num_channel,).

   In case of multiple occurrences of the maximum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   argmax_channel(x) = [ 2.,  2.]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L97

Parameters

data	The input array

Returns: new symbol

Symbol mxnet::cpp::argmin	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>()`,
		bool	keepdims = `false`
	)

inline

Returns indices of the minimum values along an axis.

   In the case of multiple occurrences of minimum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   // argmin along axis 0
   argmin(x, axis=0) = [ 0.,  0.,  0.]

   // argmin along axis 1
   argmin(x, axis=1) = [ 0.,  0.]

   // argmin along axis 1 keeping same dims as an input array
   argmin(x, axis=1, keepdims=True) = [[ 0.],
   [ 0.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L77

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axis along which to perform the reduction. Negative values means indexing from right to left. ``Requires axis to be set as int, because global \param keepdims If this is set to`True`, the reduced axis is left in the result as

Returns: new symbol

Symbol mxnet::cpp::argmin	(	Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>()`,
		bool	keepdims = `false`
	)

inline

Returns indices of the minimum values along an axis.

   In the case of multiple occurrences of minimum values, the indices
   are returned.

   Examples::

   x = [[ 0.,  1.,  2.],
   [ 3.,  4.,  5.]]

   // argmin along axis 0
   argmin(x, axis=0) = [ 0.,  0.,  0.]

   // argmin along axis 1
   argmin(x, axis=1) = [ 0.,  0.]

   // argmin along axis 1 keeping same dims as an input array
   argmin(x, axis=1, keepdims=True) = [[ 0.],
   [ 0.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L77

Parameters

data	The input
axis	The axis along which to perform the reduction. Negative values means indexing from right to left. ``Requires axis to be set as int, because global \param keepdims If this is set to`True`, the reduced axis is left in the result as

Returns: new symbol

Symbol mxnet::cpp::argsort	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>(-1)`,
		bool	is_ascend = `true`,
		ArgsortDtype	dtype = `ArgsortDtype::kFloat32`
	)

inline

Returns the indices that would sort an input array along the given axis.

   This function performs sorting along the given axis and returns an array of
   as an input array that index data in sorted order.

   Examples::

   x = [[ 0.3,  0.2,  0.4],
   [ 0.1,  0.3,  0.2]]

   // sort along axis -1
   argsort(x) = [[ 1.,  0.,  2.],
   [ 0.,  2.,  1.]]

   // sort along axis 0
   argsort(x, axis=0) = [[ 1.,  0.,  1.]
   [ 0.,  1.,  0.]]

   // flatten and then sort
   argsort(x) = [ 3.,  1.,  5.,  0.,  4.,  2.]


   Defined in src/operator/tensor/ordering_op.cc:L177

Parameters

symbol_name	name of the resulting symbol
data	The input array
axis	Axis along which to sort the input tensor. If not given, the flattened
is_ascend	Whether to sort in ascending or descending order.
dtype	DType of the output indices. It is only valid when ret_typ is "indices" or "both". An error will be raised if the selected data type cannot precisely

Returns: new symbol

Symbol mxnet::cpp::argsort	(	Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>(-1)`,
		bool	is_ascend = `true`,
		ArgsortDtype	dtype = `ArgsortDtype::kFloat32`
	)

inline

Returns the indices that would sort an input array along the given axis.

   This function performs sorting along the given axis and returns an array of
   as an input array that index data in sorted order.

   Examples::

   x = [[ 0.3,  0.2,  0.4],
   [ 0.1,  0.3,  0.2]]

   // sort along axis -1
   argsort(x) = [[ 1.,  0.,  2.],
   [ 0.,  2.,  1.]]

   // sort along axis 0
   argsort(x, axis=0) = [[ 1.,  0.,  1.]
   [ 0.,  1.,  0.]]

   // flatten and then sort
   argsort(x) = [ 3.,  1.,  5.,  0.,  4.,  2.]


   Defined in src/operator/tensor/ordering_op.cc:L177

Parameters

data	The input array
axis	Axis along which to sort the input tensor. If not given, the flattened
is_ascend	Whether to sort in ascending or descending order.
dtype	DType of the output indices. It is only valid when ret_typ is "indices" or "both". An error will be raised if the selected data type cannot precisely

Returns: new symbol

Symbol mxnet::cpp::batch_dot	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs,
		bool	transpose_a = `false`,
		bool	transpose_b = `false`,
		Batch_dotForwardStype	forward_stype = `Batch_dotForwardStype::kNone`
	)

inline

Batchwise dot product.

   ``batch_dot`` is used to compute dot product of ``x`` and ``y`` when ``x`` and
   ``y`` are data in batch, namely 3D arrays in shape of `(batch_size, :, :)`.

   For example, given ``x`` with shape `(batch_size, n, m)` and ``y`` with shape
   `(batch_size, m, k)`, the result array will have shape `(batch_size, n, k)`,
   which is computed by::

   batch_dot(x,y)[i,:,:] = dot(x[i,:,:], y[i,:,:])



   Defined in src/operator/tensor/dot.cc:L125

Parameters

symbol_name	name of the resulting symbol
lhs	The first input
rhs	The second input
transpose_a	If true then transpose the first input before dot.
transpose_b	If true then transpose the second input before dot.
forward_stype	The desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still

Returns: new symbol

Symbol mxnet::cpp::batch_dot	(	Symbol	lhs,
		Symbol	rhs,
		bool	transpose_a = `false`,
		bool	transpose_b = `false`,
		Batch_dotForwardStype	forward_stype = `Batch_dotForwardStype::kNone`
	)

inline

Batchwise dot product.

   ``batch_dot`` is used to compute dot product of ``x`` and ``y`` when ``x`` and
   ``y`` are data in batch, namely 3D arrays in shape of `(batch_size, :, :)`.

   For example, given ``x`` with shape `(batch_size, n, m)` and ``y`` with shape
   `(batch_size, m, k)`, the result array will have shape `(batch_size, n, k)`,
   which is computed by::

   batch_dot(x,y)[i,:,:] = dot(x[i,:,:], y[i,:,:])



   Defined in src/operator/tensor/dot.cc:L125

Parameters

lhs	The first input
rhs	The second input
transpose_a	If true then transpose the first input before dot.
transpose_b	If true then transpose the second input before dot.
forward_stype	The desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still

Returns: new symbol

Symbol mxnet::cpp::batch_take	(	const std::string &	symbol_name,
		Symbol	a,
		Symbol	indices
	)

inline

Takes elements from a data batch.

   .. note::
   `batch_take` is deprecated. Use `pick` instead.

   Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
   an output array of shape ``(i0,)`` with::

   output[i] = input[i, indices[i]]

   Examples::

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // takes elements with specified indices
   batch_take(x, [0,1,0]) = [ 1.  4.  5.]



   Defined in src/operator/tensor/indexing_op.cc:L490

Parameters

symbol_name	name of the resulting symbol
a	The input array
indices	The index array

Returns: new symbol

Symbol mxnet::cpp::batch_take	(	Symbol	a,
		Symbol	indices
	)

inline

Takes elements from a data batch.

   .. note::
   `batch_take` is deprecated. Use `pick` instead.

   Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
   an output array of shape ``(i0,)`` with::

   output[i] = input[i, indices[i]]

   Examples::

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // takes elements with specified indices
   batch_take(x, [0,1,0]) = [ 1.  4.  5.]



   Defined in src/operator/tensor/indexing_op.cc:L490

Parameters

a	The input array
indices	The index array

Returns: new symbol

Symbol mxnet::cpp::BatchNorm	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	gamma,
		Symbol	beta,
		Symbol	moving_mean,
		Symbol	moving_var,
		double	eps = `0.001`,
		mx_float	momentum = `0.9`,
		bool	fix_gamma = `true`,
		bool	use_global_stats = `false`,
		bool	output_mean_var = `false`,
		int	axis = `1`,
		bool	cudnn_off = `false`
	)

inline

Batch normalization.

   Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis:

   .. math::

   data\_mean[i] = mean(data[:,i,:,...]) \\
   data\_var[i] = var(data[:,i,:,...])

   Then compute the normalized output, which has the same shape as input, as

   .. math::

   out[:,i,:,...] = \frac{data[:,i,:,...] -

   Both *mean* and *var* returns a scalar by treating the input as a vector.

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   the inverse of ``data_var``, which are needed for the backward pass. Note that
   two outputs are blocked.

   Besides the inputs and the outputs, this operator accepts two auxiliary
   states, ``moving_mean`` and ``moving_var``, which are *k*-length
   vectors. They are global statistics for the whole dataset, which are updated
   by::

   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
   moving_var = moving_var * momentum + data_var * (1 - momentum)

   If ``use_global_stats`` is set to be true, then ``moving_mean`` and
   ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
   the output. It is often used during inference.

   The parameter ``axis`` specifies which axis of the input shape denotes
   the 'channel' (separately normalized groups).  The default is 1.  Specifying -1
   axis to be the last item in the input shape.

   Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
   then set ``gamma`` to 1 and its gradient to 0.

   Note::

   When fix_gamma is set to True, no sparse support is provided. If fix_gamma is
   the sparse tensors will fallback.



   Defined in src/operator/nn/batch_norm.cc:L574

Parameters

symbol_name	name of the resulting symbol
data	Input data to batch normalization
gamma	gamma array
beta	beta array
moving_mean	running mean of input
moving_var	running variance of input
eps	Epsilon to prevent div 0. Must be no less than CUDNN_BN_MIN_EPSILON defined
momentum	Momentum for moving average
fix_gamma	Fix gamma while training
use_global_stats	Whether use global moving statistics instead of local
output_mean_var	Output the mean and inverse std
axis	Specify which shape axis the channel is specified
cudnn_off	Do not select CUDNN operator, if available

Returns: new symbol

Symbol mxnet::cpp::BatchNorm	(	Symbol	data,
		Symbol	gamma,
		Symbol	beta,
		Symbol	moving_mean,
		Symbol	moving_var,
		double	eps = `0.001`,
		mx_float	momentum = `0.9`,
		bool	fix_gamma = `true`,
		bool	use_global_stats = `false`,
		bool	output_mean_var = `false`,
		int	axis = `1`,
		bool	cudnn_off = `false`
	)

inline

Batch normalization.

   Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis:

   .. math::

   data\_mean[i] = mean(data[:,i,:,...]) \\
   data\_var[i] = var(data[:,i,:,...])

   Then compute the normalized output, which has the same shape as input, as

   .. math::

   out[:,i,:,...] = \frac{data[:,i,:,...] -

   Both *mean* and *var* returns a scalar by treating the input as a vector.

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   the inverse of ``data_var``, which are needed for the backward pass. Note that
   two outputs are blocked.

   Besides the inputs and the outputs, this operator accepts two auxiliary
   states, ``moving_mean`` and ``moving_var``, which are *k*-length
   vectors. They are global statistics for the whole dataset, which are updated
   by::

   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
   moving_var = moving_var * momentum + data_var * (1 - momentum)

   If ``use_global_stats`` is set to be true, then ``moving_mean`` and
   ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
   the output. It is often used during inference.

   The parameter ``axis`` specifies which axis of the input shape denotes
   the 'channel' (separately normalized groups).  The default is 1.  Specifying -1
   axis to be the last item in the input shape.

   Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
   then set ``gamma`` to 1 and its gradient to 0.

   Note::

   When fix_gamma is set to True, no sparse support is provided. If fix_gamma is
   the sparse tensors will fallback.



   Defined in src/operator/nn/batch_norm.cc:L574

Parameters

data	Input data to batch normalization
gamma	gamma array
beta	beta array
moving_mean	running mean of input
moving_var	running variance of input
eps	Epsilon to prevent div 0. Must be no less than CUDNN_BN_MIN_EPSILON defined
momentum	Momentum for moving average
fix_gamma	Fix gamma while training
use_global_stats	Whether use global moving statistics instead of local
output_mean_var	Output the mean and inverse std
axis	Specify which shape axis the channel is specified
cudnn_off	Do not select CUDNN operator, if available

Returns: new symbol

Symbol mxnet::cpp::BatchNorm_v1	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	gamma,
		Symbol	beta,
		mx_float	eps = `0.001`,
		mx_float	momentum = `0.9`,
		bool	fix_gamma = `true`,
		bool	use_global_stats = `false`,
		bool	output_mean_var = `false`
	)

inline

Batch normalization.

   This operator is DEPRECATED. Perform BatchNorm on the input.

   Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis:

   .. math::

   data\_mean[i] = mean(data[:,i,:,...]) \\
   data\_var[i] = var(data[:,i,:,...])

   Then compute the normalized output, which has the same shape as input, as

   .. math::

   out[:,i,:,...] = \frac{data[:,i,:,...] -

   Both *mean* and *var* returns a scalar by treating the input as a vector.

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   ``data_var`` as well, which are needed for the backward pass.

   Besides the inputs and the outputs, this operator accepts two auxiliary
   states, ``moving_mean`` and ``moving_var``, which are *k*-length
   vectors. They are global statistics for the whole dataset, which are updated
   by::

   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
   moving_var = moving_var * momentum + data_var * (1 - momentum)

   If ``use_global_stats`` is set to be true, then ``moving_mean`` and
   ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
   the output. It is often used during inference.

   Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
   then set ``gamma`` to 1 and its gradient to 0.

   There's no sparse support for this operator, and it will exhibit problematic
   sparse tensors.



   Defined in src/operator/batch_norm_v1.cc:L95

Parameters

symbol_name	name of the resulting symbol
data	Input data to batch normalization
gamma	gamma array
beta	beta array
eps	Epsilon to prevent div 0
momentum	Momentum for moving average
fix_gamma	Fix gamma while training
use_global_stats	Whether use global moving statistics instead of local
output_mean_var	Output All,normal mean and var

Returns: new symbol

Symbol mxnet::cpp::BatchNorm_v1	(	Symbol	data,
		Symbol	gamma,
		Symbol	beta,
		mx_float	eps = `0.001`,
		mx_float	momentum = `0.9`,
		bool	fix_gamma = `true`,
		bool	use_global_stats = `false`,
		bool	output_mean_var = `false`
	)

inline

Batch normalization.

   This operator is DEPRECATED. Perform BatchNorm on the input.

   Normalizes a data batch by mean and variance, and applies a scale ``gamma`` as
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis:

   .. math::

   data\_mean[i] = mean(data[:,i,:,...]) \\
   data\_var[i] = var(data[:,i,:,...])

   Then compute the normalized output, which has the same shape as input, as

   .. math::

   out[:,i,:,...] = \frac{data[:,i,:,...] -

   Both *mean* and *var* returns a scalar by treating the input as a vector.

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   ``data_var`` as well, which are needed for the backward pass.

   Besides the inputs and the outputs, this operator accepts two auxiliary
   states, ``moving_mean`` and ``moving_var``, which are *k*-length
   vectors. They are global statistics for the whole dataset, which are updated
   by::

   moving_mean = moving_mean * momentum + data_mean * (1 - momentum)
   moving_var = moving_var * momentum + data_var * (1 - momentum)

   If ``use_global_stats`` is set to be true, then ``moving_mean`` and
   ``moving_var`` are used instead of ``data_mean`` and ``data_var`` to compute
   the output. It is often used during inference.

   Both ``gamma`` and ``beta`` are learnable parameters. But if ``fix_gamma`` is
   then set ``gamma`` to 1 and its gradient to 0.

   There's no sparse support for this operator, and it will exhibit problematic
   sparse tensors.



   Defined in src/operator/batch_norm_v1.cc:L95

Parameters

data	Input data to batch normalization
gamma	gamma array
beta	beta array
eps	Epsilon to prevent div 0
momentum	Momentum for moving average
fix_gamma	Fix gamma while training
use_global_stats	Whether use global moving statistics instead of local
output_mean_var	Output All,normal mean and var

Returns: new symbol

Symbol mxnet::cpp::BilinearSampler	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	grid
	)

inline

Applies bilinear sampling to input feature map.

   Bilinear Sampling is the key of  [NIPS2015] \"Spatial Transformer Networks\".
   except that the operator has the backward pass.

   Given :math:`data` and :math:`grid`, then the output is computed by

   .. math::
   x_{src} = grid[batch, 0, y_{dst}, x_{dst}] \\
   y_{src} = grid[batch, 1, y_{dst}, x_{dst}] \\
   output[batch, channel, y_{dst}, x_{dst}] = G(data[batch, channel, y_{src},

   :math:`x_{dst}`, :math:`y_{dst}` enumerate all spatial locations in
   The out-boundary points will be padded with zeros.The shape of the output will

   The operator assumes that :math:`data` has 'NCHW' layout and :math:`grid` has

   BilinearSampler often cooperates with GridGenerator which generates sampling
   GridGenerator supports two kinds of transformation: ``affine`` and ``warp``.
   If users want to design a CustomOp to manipulate :math:`grid`, please firstly

   Example 1::

   ## Zoom out data two times
   data = array([[[[1, 4, 3, 6],
   [1, 8, 8, 9],
   [0, 4, 1, 5],
   [1, 0, 1, 3]]]])

   affine_matrix = array([[2, 0, 0],
   [0, 2, 0]])

   affine_matrix = reshape(affine_matrix, shape=(1, 6))

   grid = GridGenerator(data=affine_matrix, transform_type='affine',

   out = BilinearSampler(data, grid)

   out
   [[[[ 0,   0,     0,   0],
   [ 0,   3.5,   6.5, 0],
   [ 0,   1.25,  2.5, 0],
   [ 0,   0,     0,   0]]]


   Example 2::

   ## shift data horizontally by -1 pixel

   data = array([[[[1, 4, 3, 6],
   [1, 8, 8, 9],
   [0, 4, 1, 5],
   [1, 0, 1, 3]]]])

   warp_maxtrix = array([[[[1, 1, 1, 1],
   [1, 1, 1, 1],
   [1, 1, 1, 1],
   [1, 1, 1, 1]],
   [[0, 0, 0, 0],
   [0, 0, 0, 0],
   [0, 0, 0, 0],
   [0, 0, 0, 0]]]])

   grid = GridGenerator(data=warp_matrix, transform_type='warp')
   out = BilinearSampler(data, grid)

   out
   [[[[ 4,  3,  6,  0],
   [ 8,  8,  9,  0],
   [ 4,  1,  5,  0],
   [ 0,  1,  3,  0]]]


   Defined in src/operator/bilinear_sampler.cc:L245

Parameters

symbol_name	name of the resulting symbol
data	Input data to the BilinearsamplerOp.
grid	Input grid to the BilinearsamplerOp.grid has two channels: x_src, y_src

Returns: new symbol

Symbol mxnet::cpp::BilinearSampler	(	Symbol	data,
		Symbol	grid
	)

inline

Applies bilinear sampling to input feature map.

   Bilinear Sampling is the key of  [NIPS2015] \"Spatial Transformer Networks\".
   except that the operator has the backward pass.

   Given :math:`data` and :math:`grid`, then the output is computed by

   .. math::
   x_{src} = grid[batch, 0, y_{dst}, x_{dst}] \\
   y_{src} = grid[batch, 1, y_{dst}, x_{dst}] \\
   output[batch, channel, y_{dst}, x_{dst}] = G(data[batch, channel, y_{src},

   :math:`x_{dst}`, :math:`y_{dst}` enumerate all spatial locations in
   The out-boundary points will be padded with zeros.The shape of the output will

   The operator assumes that :math:`data` has 'NCHW' layout and :math:`grid` has

   BilinearSampler often cooperates with GridGenerator which generates sampling
   GridGenerator supports two kinds of transformation: ``affine`` and ``warp``.
   If users want to design a CustomOp to manipulate :math:`grid`, please firstly

   Example 1::

   ## Zoom out data two times
   data = array([[[[1, 4, 3, 6],
   [1, 8, 8, 9],
   [0, 4, 1, 5],
   [1, 0, 1, 3]]]])

   affine_matrix = array([[2, 0, 0],
   [0, 2, 0]])

   affine_matrix = reshape(affine_matrix, shape=(1, 6))

   grid = GridGenerator(data=affine_matrix, transform_type='affine',

   out = BilinearSampler(data, grid)

   out
   [[[[ 0,   0,     0,   0],
   [ 0,   3.5,   6.5, 0],
   [ 0,   1.25,  2.5, 0],
   [ 0,   0,     0,   0]]]


   Example 2::

   ## shift data horizontally by -1 pixel

   data = array([[[[1, 4, 3, 6],
   [1, 8, 8, 9],
   [0, 4, 1, 5],
   [1, 0, 1, 3]]]])

   warp_maxtrix = array([[[[1, 1, 1, 1],
   [1, 1, 1, 1],
   [1, 1, 1, 1],
   [1, 1, 1, 1]],
   [[0, 0, 0, 0],
   [0, 0, 0, 0],
   [0, 0, 0, 0],
   [0, 0, 0, 0]]]])

   grid = GridGenerator(data=warp_matrix, transform_type='warp')
   out = BilinearSampler(data, grid)

   out
   [[[[ 4,  3,  6,  0],
   [ 8,  8,  9,  0],
   [ 4,  1,  5,  0],
   [ 0,  1,  3,  0]]]


   Defined in src/operator/bilinear_sampler.cc:L245

Parameters

data	Input data to the BilinearsamplerOp.
grid	Input grid to the BilinearsamplerOp.grid has two channels: x_src, y_src

Returns: new symbol

Symbol mxnet::cpp::BlockGrad	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Stops gradient computation.

   Stops the accumulated gradient of the inputs from flowing through this operator
   in the backward direction. In other words, this operator prevents the
   of its inputs to be taken into account for computing gradients.

   Example::

   v1 = [1, 2]
   v2 = [0, 1]
   a = Variable('a')
   b = Variable('b')
   b_stop_grad = stop_gradient(3 * b)
   loss = MakeLoss(b_stop_grad + a)

   executor = loss.simple_bind(ctx=cpu(), a=(1,2), b=(1,2))
   executor.forward(is_train=True, a=v1, b=v2)
   executor.outputs
   [ 1.  5.]

   executor.backward()
   executor.grad_arrays
   [ 0.  0.]
   [ 1.  1.]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L265

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::BlockGrad ( Symbol data )

inline

Stops gradient computation.

   Stops the accumulated gradient of the inputs from flowing through this operator
   in the backward direction. In other words, this operator prevents the
   of its inputs to be taken into account for computing gradients.

   Example::

   v1 = [1, 2]
   v2 = [0, 1]
   a = Variable('a')
   b = Variable('b')
   b_stop_grad = stop_gradient(3 * b)
   loss = MakeLoss(b_stop_grad + a)

   executor = loss.simple_bind(ctx=cpu(), a=(1,2), b=(1,2))
   executor.forward(is_train=True, a=v1, b=v2)
   executor.outputs
   [ 1.  5.]

   executor.backward()
   executor.grad_arrays
   [ 0.  0.]
   [ 1.  1.]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L265

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::broadcast_add	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise sum of the input arrays with broadcasting.

   `broadcast_plus` is an alias to the function `broadcast_add`.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_add(x, y) = [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]

   broadcast_plus(x, y) = [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]

   Supported sparse operations:

   broadcast_add(csr, dense(1D)) = dense
   broadcast_add(dense(1D), csr) = dense



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L58

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_add	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise sum of the input arrays with broadcasting.

   `broadcast_plus` is an alias to the function `broadcast_add`.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_add(x, y) = [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]

   broadcast_plus(x, y) = [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]

   Supported sparse operations:

   broadcast_add(csr, dense(1D)) = dense
   broadcast_add(dense(1D), csr) = dense



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L58

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_axis	(	const std::string &	symbol_name,
		Symbol	data,
		Shape	axis = `Shape()`,
		Shape	size = `Shape()`
	)

inline

Broadcasts the input array over particular axes.

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   Example::

   // given x of shape (1,2,1)
   x = [[[ 1.],
   [ 2.]]]

   // broadcast x on on axis 2
   broadcast_axis(x, axis=2, size=3) = [[[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]]
   // broadcast x on on axes 0 and 2
   broadcast_axis(x, axis=(0,2), size=(2,3)) = [[[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]],
   [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]]


   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L237

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axes to perform the broadcasting.
size	Target sizes of the broadcasting axes.

Returns: new symbol

Symbol mxnet::cpp::broadcast_axis	(	Symbol	data,
		Shape	axis = `Shape()`,
		Shape	size = `Shape()`
	)

inline

Broadcasts the input array over particular axes.

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   Example::

   // given x of shape (1,2,1)
   x = [[[ 1.],
   [ 2.]]]

   // broadcast x on on axis 2
   broadcast_axis(x, axis=2, size=3) = [[[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]]
   // broadcast x on on axes 0 and 2
   broadcast_axis(x, axis=(0,2), size=(2,3)) = [[[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]],
   [[ 1.,  1.,  1.],
   [ 2.,  2.,  2.]]]


   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L237

Parameters

data	The input
axis	The axes to perform the broadcasting.
size	Target sizes of the broadcasting axes.

Returns: new symbol

Symbol mxnet::cpp::broadcast_div	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise division of the input arrays with broadcasting.

   Example::

   x = [[ 6.,  6.,  6.],
   [ 6.,  6.,  6.]]

   y = [[ 2.],
   [ 3.]]

   broadcast_div(x, y) = [[ 3.,  3.,  3.],
   [ 2.,  2.,  2.]]

   Supported sparse operations:

   broadcast_div(csr, dense(1D)) = csr



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L187

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_div	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise division of the input arrays with broadcasting.

   Example::

   x = [[ 6.,  6.,  6.],
   [ 6.,  6.,  6.]]

   y = [[ 2.],
   [ 3.]]

   broadcast_div(x, y) = [[ 3.,  3.,  3.],
   [ 2.,  2.,  2.]]

   Supported sparse operations:

   broadcast_div(csr, dense(1D)) = csr



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L187

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_equal	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise equal to (==) comparison operation with

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_equal(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L46

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_equal	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise equal to (==) comparison operation with

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_equal(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L46

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_greater	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise greater than (>) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_greater(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L82

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_greater	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise greater than (>) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_greater(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L82

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_greater_equal	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise greater than or equal to (>=) comparison

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_greater_equal(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L100

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_greater_equal	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise greater than or equal to (>=) comparison

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_greater_equal(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L100

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_hypot	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the hypotenuse of a right angled triangle, given its "legs" with broadcasting.

It is equivalent to doing :math:sqrt(x_1^2 + x_2^2).

Example::

x = [[ 3., 3., 3.]]

y = [[ 4.], [ 4.]]

broadcast_hypot(x, y) = [[ 5., 5., 5.], [ 5., 5., 5.]]

z = [[ 0.], [ 4.]]

broadcast_hypot(x, z) = [[ 3., 3., 3.], [ 5., 5., 5.]]

   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L156

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_hypot	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the hypotenuse of a right angled triangle, given its "legs" with broadcasting.

It is equivalent to doing :math:sqrt(x_1^2 + x_2^2).

Example::

x = [[ 3., 3., 3.]]

y = [[ 4.], [ 4.]]

broadcast_hypot(x, y) = [[ 5., 5., 5.], [ 5., 5., 5.]]

z = [[ 0.], [ 4.]]

broadcast_hypot(x, z) = [[ 3., 3., 3.], [ 5., 5., 5.]]

   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L156

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_lesser	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise lesser than (<) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_lesser(x, y) = [[ 0.,  0.,  0.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L118

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_lesser	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise lesser than (<) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_lesser(x, y) = [[ 0.,  0.,  0.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L118

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_lesser_equal	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise lesser than or equal to (<=) comparison

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_lesser_equal(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L136

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_lesser_equal	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise lesser than or equal to (<=) comparison

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_lesser_equal(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L136

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_like	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Broadcasts lhs to have the same shape as rhs.

   Broadcasting is a mechanism that allows NDArrays to perform arithmetic
   with arrays of different shapes efficiently without creating multiple copies of
   Also see, `Broadcasting
   <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   For example::

   broadcast_like([[1,2,3]], [[5,6,7],[7,8,9]]) = [[ 1.,  2.,  3.],
   [ 1.,  2.,  3.]])



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L312

Parameters

symbol_name	name of the resulting symbol
lhs	First input.
rhs	Second input.

Returns: new symbol

Symbol mxnet::cpp::broadcast_like	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Broadcasts lhs to have the same shape as rhs.

   Broadcasting is a mechanism that allows NDArrays to perform arithmetic
   with arrays of different shapes efficiently without creating multiple copies of
   Also see, `Broadcasting
   <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   For example::

   broadcast_like([[1,2,3]], [[5,6,7],[7,8,9]]) = [[ 1.,  2.,  3.],
   [ 1.,  2.,  3.]])



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L312

Parameters

lhs	First input.
rhs	Second input.

Returns: new symbol

Symbol mxnet::cpp::broadcast_logical_and	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise logical and with broadcasting.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_logical_and(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L154

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_logical_and	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise logical and with broadcasting.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_logical_and(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L154

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_logical_or	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise logical or with broadcasting.

   Example::

   x = [[ 1.,  1.,  0.],
   [ 1.,  1.,  0.]]

   y = [[ 1.],
   [ 0.]]

   broadcast_logical_or(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L172

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_logical_or	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise logical or with broadcasting.

   Example::

   x = [[ 1.,  1.,  0.],
   [ 1.,  1.,  0.]]

   y = [[ 1.],
   [ 0.]]

   broadcast_logical_or(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L172

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_logical_xor	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise logical xor with broadcasting.

   Example::

   x = [[ 1.,  1.,  0.],
   [ 1.,  1.,  0.]]

   y = [[ 1.],
   [ 0.]]

   broadcast_logical_xor(x, y) = [[ 0.,  0.,  1.],
   [ 1.,  1.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L190

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_logical_xor	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise logical xor with broadcasting.

   Example::

   x = [[ 1.,  1.,  0.],
   [ 1.,  1.,  0.]]

   y = [[ 1.],
   [ 0.]]

   broadcast_logical_xor(x, y) = [[ 0.,  0.,  1.],
   [ 1.,  1.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L190

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_maximum	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise maximum of the input arrays with broadcasting.

   This function compares two input arrays and returns a new array having the

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_maximum(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L80

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_maximum	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise maximum of the input arrays with broadcasting.

   This function compares two input arrays and returns a new array having the

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_maximum(x, y) = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L80

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_minimum	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise minimum of the input arrays with broadcasting.

   This function compares two input arrays and returns a new array having the

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_maximum(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L115

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_minimum	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise minimum of the input arrays with broadcasting.

   This function compares two input arrays and returns a new array having the

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_maximum(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L115

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_mod	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise modulo of the input arrays with broadcasting.

   Example::

   x = [[ 8.,  8.,  8.],
   [ 8.,  8.,  8.]]

   y = [[ 2.],
   [ 3.]]

   broadcast_mod(x, y) = [[ 0.,  0.,  0.],
   [ 2.,  2.,  2.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L222

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_mod	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise modulo of the input arrays with broadcasting.

   Example::

   x = [[ 8.,  8.,  8.],
   [ 8.,  8.,  8.]]

   y = [[ 2.],
   [ 3.]]

   broadcast_mod(x, y) = [[ 0.,  0.,  0.],
   [ 2.,  2.,  2.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L222

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_mul	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise product of the input arrays with broadcasting.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_mul(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]

   Supported sparse operations:

   broadcast_mul(csr, dense(1D)) = csr



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L146

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_mul	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise product of the input arrays with broadcasting.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_mul(x, y) = [[ 0.,  0.,  0.],
   [ 1.,  1.,  1.]]

   Supported sparse operations:

   broadcast_mul(csr, dense(1D)) = csr



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L146

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_not_equal	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise not equal to (!=) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_not_equal(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L64

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_not_equal	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns the result of element-wise not equal to (!=) comparison operation

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_not_equal(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_logic.cc:L64

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_power	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns result of first array elements raised to powers from second array,

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_power(x, y) = [[ 2.,  2.,  2.],
   [ 4.,  4.,  4.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L45

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_power	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns result of first array elements raised to powers from second array,

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_power(x, y) = [[ 2.,  2.,  2.],
   [ 4.,  4.,  4.]]



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_extended.cc:L45

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_sub	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise difference of the input arrays with broadcasting.

   `broadcast_minus` is an alias to the function `broadcast_sub`.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_sub(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]

   broadcast_minus(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]

   Supported sparse operations:

   broadcast_sub/minus(csr, dense(1D)) = dense
   broadcast_sub/minus(dense(1D), csr) = dense



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L106

Parameters

symbol_name	name of the resulting symbol
lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_sub	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Returns element-wise difference of the input arrays with broadcasting.

   `broadcast_minus` is an alias to the function `broadcast_sub`.

   Example::

   x = [[ 1.,  1.,  1.],
   [ 1.,  1.,  1.]]

   y = [[ 0.],
   [ 1.]]

   broadcast_sub(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]

   broadcast_minus(x, y) = [[ 1.,  1.,  1.],
   [ 0.,  0.,  0.]]

   Supported sparse operations:

   broadcast_sub/minus(csr, dense(1D)) = dense
   broadcast_sub/minus(dense(1D), csr) = dense



   Defined in src/operator/tensor/elemwise_binary_broadcast_op_basic.cc:L106

Parameters

lhs	First input to the function
rhs	Second input to the function

Returns: new symbol

Symbol mxnet::cpp::broadcast_to	(	const std::string &	symbol_name,
		Symbol	data,
		Shape	shape = `Shape()`
	)

inline

Broadcasts the input array to a new shape.

   Broadcasting is a mechanism that allows NDArrays to perform arithmetic
   with arrays of different shapes efficiently without creating multiple copies of
   Also see, `Broadcasting
   <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   For example::

   broadcast_to([[1,2,3]], shape=(2,3)) = [[ 1.,  2.,  3.],
   [ 1.,  2.,  3.]])

   The dimension which you do not want to change can also be kept as `0` which
   So with `shape=(2,0)`, we will obtain the same result as in the above example.



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L261

Parameters

symbol_name	name of the resulting symbol
data	The input
shape	The shape of the desired array. We can set the dim to zero if it's same as the original. E.g `A = broadcast_to(B, shape=(10, 0, 0))` has the same

Returns: new symbol

Symbol mxnet::cpp::broadcast_to	(	Symbol	data,
		Shape	shape = `Shape()`
	)

inline

Broadcasts the input array to a new shape.

   Broadcasting is a mechanism that allows NDArrays to perform arithmetic
   with arrays of different shapes efficiently without creating multiple copies of
   Also see, `Broadcasting
   <https://docs.scipy.org/doc/numpy/user/basics.broadcasting.html>`_ for more

   Broadcasting is allowed on axes with size 1, such as from `(2,1,3,1)` to
   `(2,8,3,9)`. Elements will be duplicated on the broadcasted axes.

   For example::

   broadcast_to([[1,2,3]], shape=(2,3)) = [[ 1.,  2.,  3.],
   [ 1.,  2.,  3.]])

   The dimension which you do not want to change can also be kept as `0` which
   So with `shape=(2,0)`, we will obtain the same result as in the above example.



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L261

Parameters

data	The input
shape	The shape of the desired array. We can set the dim to zero if it's same as the original. E.g `A = broadcast_to(B, shape=(10, 0, 0))` has the same

Returns: new symbol

Symbol mxnet::cpp::Cast	(	const std::string &	symbol_name,
		Symbol	data,
		CastDtype	dtype
	)

inline

Casts all elements of the input to a new type.

   .. note:: ``Cast`` is deprecated. Use ``cast`` instead.

   Example::

   cast([0.9, 1.3], dtype='int32') = [0, 1]
   cast([1e20, 11.1], dtype='float16') = [inf, 11.09375]
   cast([300, 11.1, 10.9, -1, -3], dtype='uint8') = [44, 11, 10, 255, 253]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L594

Parameters

symbol_name	name of the resulting symbol
data	The input.
dtype	Output data type.

Returns: new symbol

Symbol mxnet::cpp::Cast	(	Symbol	data,
		CastDtype	dtype
	)

inline

Casts all elements of the input to a new type.

   .. note:: ``Cast`` is deprecated. Use ``cast`` instead.

   Example::

   cast([0.9, 1.3], dtype='int32') = [0, 1]
   cast([1e20, 11.1], dtype='float16') = [inf, 11.09375]
   cast([300, 11.1, 10.9, -1, -3], dtype='uint8') = [44, 11, 10, 255, 253]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L594

Parameters

data	The input.
dtype	Output data type.

Returns: new symbol

Symbol mxnet::cpp::cast_storage	(	const std::string &	symbol_name,
		Symbol	data,
		Cast_storageStype	stype
	)

inline

Casts tensor storage type to the new type.

   When an NDArray with default storage type is cast to csr or row_sparse storage,
   the result is compact, which means:

   - for csr, zero values will not be retained
   - for row_sparse, row slices of all zeros will not be retained

   The storage type of ``cast_storage`` output depends on stype parameter:

   - cast_storage(csr, 'default') = default
   - cast_storage(row_sparse, 'default') = default
   - cast_storage(default, 'csr') = csr
   - cast_storage(default, 'row_sparse') = row_sparse
   - cast_storage(csr, 'csr') = csr
   - cast_storage(row_sparse, 'row_sparse') = row_sparse

   Example::

   dense = [[ 0.,  1.,  0.],
   [ 2.,  0.,  3.],
   [ 0.,  0.,  0.],
   [ 0.,  0.,  0.]]

   # cast to row_sparse storage type
   rsp = cast_storage(dense, 'row_sparse')
   rsp.indices = [0, 1]
   rsp.values = [[ 0.,  1.,  0.],
   [ 2.,  0.,  3.]]

   # cast to csr storage type
   csr = cast_storage(dense, 'csr')
   csr.indices = [1, 0, 2]
   csr.values = [ 1.,  2.,  3.]
   csr.indptr = [0, 1, 3, 3, 3]



   Defined in src/operator/tensor/cast_storage.cc:L71

Parameters

symbol_name	name of the resulting symbol
data	The input.
stype	Output storage type.

Returns: new symbol

Symbol mxnet::cpp::cast_storage	(	Symbol	data,
		Cast_storageStype	stype
	)

inline

Casts tensor storage type to the new type.

   When an NDArray with default storage type is cast to csr or row_sparse storage,
   the result is compact, which means:

   - for csr, zero values will not be retained
   - for row_sparse, row slices of all zeros will not be retained

   The storage type of ``cast_storage`` output depends on stype parameter:

   - cast_storage(csr, 'default') = default
   - cast_storage(row_sparse, 'default') = default
   - cast_storage(default, 'csr') = csr
   - cast_storage(default, 'row_sparse') = row_sparse
   - cast_storage(csr, 'csr') = csr
   - cast_storage(row_sparse, 'row_sparse') = row_sparse

   Example::

   dense = [[ 0.,  1.,  0.],
   [ 2.,  0.,  3.],
   [ 0.,  0.,  0.],
   [ 0.,  0.,  0.]]

   # cast to row_sparse storage type
   rsp = cast_storage(dense, 'row_sparse')
   rsp.indices = [0, 1]
   rsp.values = [[ 0.,  1.,  0.],
   [ 2.,  0.,  3.]]

   # cast to csr storage type
   csr = cast_storage(dense, 'csr')
   csr.indices = [1, 0, 2]
   csr.values = [ 1.,  2.,  3.]
   csr.indptr = [0, 1, 3, 3, 3]



   Defined in src/operator/tensor/cast_storage.cc:L71

Parameters

data	The input.
stype	Output storage type.

Returns: new symbol

Symbol mxnet::cpp::cbrt	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise cube-root value of the input.

   .. math::
   cbrt(x) = \sqrt[3]{x}

   Example::

   cbrt([1, 8, -125]) = [1, 2, -5]

   The storage type of ``cbrt`` output depends upon the input storage type:

   - cbrt(default) = default
   - cbrt(row_sparse) = row_sparse
   - cbrt(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L889

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::cbrt ( Symbol data )

inline

Returns element-wise cube-root value of the input.

   .. math::
   cbrt(x) = \sqrt[3]{x}

   Example::

   cbrt([1, 8, -125]) = [1, 2, -5]

   The storage type of ``cbrt`` output depends upon the input storage type:

   - cbrt(default) = default
   - cbrt(row_sparse) = row_sparse
   - cbrt(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L889

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::ceil	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise ceiling of the input.

   The ceil of the scalar x is the smallest integer i, such that i >= x.

   Example::

   ceil([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  2.,  2.,  3.]

   The storage type of ``ceil`` output depends upon the input storage type:

   - ceil(default) = default
   - ceil(row_sparse) = row_sparse
   - ceil(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L746

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::ceil ( Symbol data )

inline

Returns element-wise ceiling of the input.

   The ceil of the scalar x is the smallest integer i, such that i >= x.

   Example::

   ceil([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  2.,  2.,  3.]

   The storage type of ``ceil`` output depends upon the input storage type:

   - ceil(default) = default
   - ceil(row_sparse) = row_sparse
   - ceil(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L746

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::choose_element_0index	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs. This function assume rhs uses 0-based

Parameters

symbol_name	name of the resulting symbol
lhs	Left operand to the function.
rhs	Right operand to the function.

Returns: new symbol

Symbol mxnet::cpp::choose_element_0index	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Choose one element from each line(row for python, column for R/Julia) in lhs according to index indicated by rhs. This function assume rhs uses 0-based

Parameters

lhs	Left operand to the function.
rhs	Right operand to the function.

Returns: new symbol

Symbol mxnet::cpp::clip	(	const std::string &	symbol_name,
		Symbol	data,
		mx_float	a_min,
		mx_float	a_max
	)

inline

Clips (limits) the values in an array.

   Given an interval, values outside the interval are clipped to the interval
   Clipping ``x`` between `a_min` and `a_x` would be::

   clip(x, a_min, a_max) = max(min(x, a_max), a_min))

   Example::

   x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

   clip(x,1,8) = [ 1.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.]

   The storage type of ``clip`` output depends on storage types of inputs and the
   parameter values:

   - clip(default) = default
   - clip(row_sparse, a_min <= 0, a_max >= 0) = row_sparse
   - clip(csr, a_min <= 0, a_max >= 0) = csr
   - clip(row_sparse, a_min < 0, a_max < 0) = default
   - clip(row_sparse, a_min > 0, a_max > 0) = default
   - clip(csr, a_min < 0, a_max < 0) = csr
   - clip(csr, a_min > 0, a_max > 0) = csr



   Defined in src/operator/tensor/matrix_op.cc:L618

Parameters

symbol_name	name of the resulting symbol
data	Input array.
a_min	Minimum value
a_max	Maximum value

Returns: new symbol

Symbol mxnet::cpp::clip	(	Symbol	data,
		mx_float	a_min,
		mx_float	a_max
	)

inline

Clips (limits) the values in an array.

   Given an interval, values outside the interval are clipped to the interval
   Clipping ``x`` between `a_min` and `a_x` would be::

   clip(x, a_min, a_max) = max(min(x, a_max), a_min))

   Example::

   x = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

   clip(x,1,8) = [ 1.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  8.]

   The storage type of ``clip`` output depends on storage types of inputs and the
   parameter values:

   - clip(default) = default
   - clip(row_sparse, a_min <= 0, a_max >= 0) = row_sparse
   - clip(csr, a_min <= 0, a_max >= 0) = csr
   - clip(row_sparse, a_min < 0, a_max < 0) = default
   - clip(row_sparse, a_min > 0, a_max > 0) = default
   - clip(csr, a_min < 0, a_max < 0) = csr
   - clip(csr, a_min > 0, a_max > 0) = csr



   Defined in src/operator/tensor/matrix_op.cc:L618

Parameters

data	Input array.
a_min	Minimum value
a_max	Maximum value

Returns: new symbol

Symbol mxnet::cpp::Concat	(	const std::string &	symbol_name,
		const std::vector< Symbol > &	data,
		int	num_args,
		int	dim = `1`
	)

inline

Joins input arrays along a given axis.

   .. note:: `Concat` is deprecated. Use `concat` instead.

   The dimensions of the input arrays should be the same except the axis along
   which they will be concatenated.
   The dimension of the output array along the concatenated axis will be equal
   to the sum of the corresponding dimensions of the input arrays.

   The storage type of ``concat`` output depends on storage types of inputs

   - concat(csr, csr, ..., csr, dim=0) = csr
   - otherwise, ``concat`` generates output with default storage

   Example::

   x = [[1,1],[2,2]]
   y = [[3,3],[4,4],[5,5]]
   z = [[6,6], [7,7],[8,8]]

   concat(x,y,z,dim=0) = [[ 1.,  1.],
   [ 2.,  2.],
   [ 3.,  3.],
   [ 4.,  4.],
   [ 5.,  5.],
   [ 6.,  6.],
   [ 7.,  7.],
   [ 8.,  8.]]

   Note that you cannot concat x,y,z along dimension 1 since dimension
   0 is not the same for all the input arrays.

   concat(y,z,dim=1) = [[ 3.,  3.,  6.,  6.],
   [ 4.,  4.,  7.,  7.],
   [ 5.,  5.,  8.,  8.]]



   Defined in src/operator/nn/concat.cc:L368

Parameters

symbol_name	name of the resulting symbol
data	List of arrays to concatenate
num_args	Number of inputs to be concated.
dim	the dimension to be concated.

Returns: new symbol

Symbol mxnet::cpp::Concat	(	const std::vector< Symbol > &	data,
		int	num_args,
		int	dim = `1`
	)

inline

Joins input arrays along a given axis.

   .. note:: `Concat` is deprecated. Use `concat` instead.

   The dimensions of the input arrays should be the same except the axis along
   which they will be concatenated.
   The dimension of the output array along the concatenated axis will be equal
   to the sum of the corresponding dimensions of the input arrays.

   The storage type of ``concat`` output depends on storage types of inputs

   - concat(csr, csr, ..., csr, dim=0) = csr
   - otherwise, ``concat`` generates output with default storage

   Example::

   x = [[1,1],[2,2]]
   y = [[3,3],[4,4],[5,5]]
   z = [[6,6], [7,7],[8,8]]

   concat(x,y,z,dim=0) = [[ 1.,  1.],
   [ 2.,  2.],
   [ 3.,  3.],
   [ 4.,  4.],
   [ 5.,  5.],
   [ 6.,  6.],
   [ 7.,  7.],
   [ 8.,  8.]]

   Note that you cannot concat x,y,z along dimension 1 since dimension
   0 is not the same for all the input arrays.

   concat(y,z,dim=1) = [[ 3.,  3.,  6.,  6.],
   [ 4.,  4.,  7.,  7.],
   [ 5.,  5.,  8.,  8.]]



   Defined in src/operator/nn/concat.cc:L368

Parameters

data	List of arrays to concatenate
num_args	Number of inputs to be concated.
dim	the dimension to be concated.

Returns: new symbol

Symbol mxnet::cpp::Convolution	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	weight,
		Symbol	bias,
		Shape	kernel,
		uint32_t	num_filter,
		Shape	stride = `Shape()`,
		Shape	dilate = `Shape()`,
		Shape	pad = `Shape()`,
		uint32_t	num_group = `1`,
		uint64_t	workspace = `1024`,
		bool	no_bias = `false`,
		ConvolutionCudnnTune	cudnn_tune = `ConvolutionCudnnTune::kNone`,
		bool	cudnn_off = `false`,
		ConvolutionLayout	layout = `ConvolutionLayout::kNone`
	)

inline

Compute N-D convolution on *(N+2)*-D input.

   In the 2-D convolution, given input data with shape *(batch_size,
   channel, height, width)*, the output is computed by

   .. math::

   out[n,i,:,:] = bias[i] + \sum_{j=0}^{channel} data[n,j,:,:] \star
   weight[i,j,:,:]

   where :math:`\star` is the 2-D cross-correlation operator.

   For general 2-D convolution, the shapes are

   - **data**: *(batch_size, channel, height, width)*
   - **weight**: *(num_filter, channel, kernel[0], kernel[1])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_height, out_width)*.

   Define::

   f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1

   then we have::

   out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
   out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])

   If ``no_bias`` is set to be true, then the ``bias`` term is ignored.

   The default data ``layout`` is *NCHW*, namely *(batch_size, channel, height,
   width)*. We can choose other layouts such as *NWC*.

   If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
   evenly into *g* parts along the channel axis, and also evenly split ``weight``
   along the first dimension. Next compute the convolution on the *i*-th part of
   the data with the *i*-th weight part. The output is obtained by concatenating
   the *g* results.

   1-D convolution does not have *height* dimension but only *width* in space.

   - **data**: *(batch_size, channel, width)*
   - **weight**: *(num_filter, channel, kernel[0])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_width)*.

   3-D convolution adds an additional *depth* dimension besides *height* and
   *width*. The shapes are

   - **data**: *(batch_size, channel, depth, height, width)*
   - **weight**: *(num_filter, channel, kernel[0], kernel[1], kernel[2])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_depth, out_height, out_width)*.

   Both ``weight`` and ``bias`` are learnable parameters.

   There are other options to tune the performance.

   - **cudnn_tune**: enable this option leads to higher startup time but may give
   faster speed. Options are

   - **off**: no tuning
   - **limited_workspace**:run test and pick the fastest algorithm that doesn't
   exceed workspace limit.
   - **fastest**: pick the fastest algorithm and ignore workspace limit.
   - **None** (default): the behavior is determined by environment variable
   ``MXNET_CUDNN_AUTOTUNE_DEFAULT``. 0 for off, 1 for limited workspace
   (default), 2 for fastest.

   - **workspace**: A large number leads to more (GPU) memory usage but may improve
   the performance.



   Defined in src/operator/nn/convolution.cc:L474

Parameters

symbol_name	name of the resulting symbol
data	Input data to the ConvolutionOp.
weight	Weight matrix.
bias	Bias parameter.
kernel	Convolution kernel size: (w,), (h, w) or (d, h, w)
num_filter	Convolution filter(channel) number
stride	Convolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each
dilate	Convolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each
pad	Zero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.
num_group	Number of group partitions.
workspace	Maximum temporary workspace allowed (MB) in convolution.This parameter has two usages. When CUDNN is not used, it determines the effective batch size of the convolution kernel. When CUDNN is used, it controls the maximum temporary storage used for tuning the best CUDNN kernel when
no_bias	Whether to disable bias parameter.
cudnn_tune	Whether to pick convolution algo by running performance test.
cudnn_off	Turn off cudnn for this layer.
layout	Set layout for input, output and weight. Empty for default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are

Returns: new symbol

Symbol mxnet::cpp::Convolution	(	Symbol	data,
		Symbol	weight,
		Symbol	bias,
		Shape	kernel,
		uint32_t	num_filter,
		Shape	stride = `Shape()`,
		Shape	dilate = `Shape()`,
		Shape	pad = `Shape()`,
		uint32_t	num_group = `1`,
		uint64_t	workspace = `1024`,
		bool	no_bias = `false`,
		ConvolutionCudnnTune	cudnn_tune = `ConvolutionCudnnTune::kNone`,
		bool	cudnn_off = `false`,
		ConvolutionLayout	layout = `ConvolutionLayout::kNone`
	)

inline

Compute N-D convolution on *(N+2)*-D input.

   In the 2-D convolution, given input data with shape *(batch_size,
   channel, height, width)*, the output is computed by

   .. math::

   out[n,i,:,:] = bias[i] + \sum_{j=0}^{channel} data[n,j,:,:] \star
   weight[i,j,:,:]

   where :math:`\star` is the 2-D cross-correlation operator.

   For general 2-D convolution, the shapes are

   - **data**: *(batch_size, channel, height, width)*
   - **weight**: *(num_filter, channel, kernel[0], kernel[1])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_height, out_width)*.

   Define::

   f(x,k,p,s,d) = floor((x+2*p-d*(k-1)-1)/s)+1

   then we have::

   out_height=f(height, kernel[0], pad[0], stride[0], dilate[0])
   out_width=f(width, kernel[1], pad[1], stride[1], dilate[1])

   If ``no_bias`` is set to be true, then the ``bias`` term is ignored.

   The default data ``layout`` is *NCHW*, namely *(batch_size, channel, height,
   width)*. We can choose other layouts such as *NWC*.

   If ``num_group`` is larger than 1, denoted by *g*, then split the input ``data``
   evenly into *g* parts along the channel axis, and also evenly split ``weight``
   along the first dimension. Next compute the convolution on the *i*-th part of
   the data with the *i*-th weight part. The output is obtained by concatenating
   the *g* results.

   1-D convolution does not have *height* dimension but only *width* in space.

   - **data**: *(batch_size, channel, width)*
   - **weight**: *(num_filter, channel, kernel[0])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_width)*.

   3-D convolution adds an additional *depth* dimension besides *height* and
   *width*. The shapes are

   - **data**: *(batch_size, channel, depth, height, width)*
   - **weight**: *(num_filter, channel, kernel[0], kernel[1], kernel[2])*
   - **bias**: *(num_filter,)*
   - **out**: *(batch_size, num_filter, out_depth, out_height, out_width)*.

   Both ``weight`` and ``bias`` are learnable parameters.

   There are other options to tune the performance.

   - **cudnn_tune**: enable this option leads to higher startup time but may give
   faster speed. Options are

   - **off**: no tuning
   - **limited_workspace**:run test and pick the fastest algorithm that doesn't
   exceed workspace limit.
   - **fastest**: pick the fastest algorithm and ignore workspace limit.
   - **None** (default): the behavior is determined by environment variable
   ``MXNET_CUDNN_AUTOTUNE_DEFAULT``. 0 for off, 1 for limited workspace
   (default), 2 for fastest.

   - **workspace**: A large number leads to more (GPU) memory usage but may improve
   the performance.



   Defined in src/operator/nn/convolution.cc:L474

Parameters

data	Input data to the ConvolutionOp.
weight	Weight matrix.
bias	Bias parameter.
kernel	Convolution kernel size: (w,), (h, w) or (d, h, w)
num_filter	Convolution filter(channel) number
stride	Convolution stride: (w,), (h, w) or (d, h, w). Defaults to 1 for each
dilate	Convolution dilate: (w,), (h, w) or (d, h, w). Defaults to 1 for each
pad	Zero pad for convolution: (w,), (h, w) or (d, h, w). Defaults to no padding.
num_group	Number of group partitions.
workspace	Maximum temporary workspace allowed (MB) in convolution.This parameter has two usages. When CUDNN is not used, it determines the effective batch size of the convolution kernel. When CUDNN is used, it controls the maximum temporary storage used for tuning the best CUDNN kernel when
no_bias	Whether to disable bias parameter.
cudnn_tune	Whether to pick convolution algo by running performance test.
cudnn_off	Turn off cudnn for this layer.
layout	Set layout for input, output and weight. Empty for default layout: NCW for 1d, NCHW for 2d and NCDHW for 3d.NHWC and NDHWC are

Returns: new symbol

Symbol mxnet::cpp::Convolution_v1	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	weight,
		Symbol	bias,
		Shape	kernel,
		uint32_t	num_filter,
		Shape	stride = `Shape()`,
		Shape	dilate = `Shape()`,
		Shape	pad = `Shape()`,
		uint32_t	num_group = `1`,
		uint64_t	workspace = `1024`,
		bool	no_bias = `false`,
		Convolution_v1CudnnTune	cudnn_tune = `Convolution_v1CudnnTune::kNone`,
		bool	cudnn_off = `false`,
		Convolution_v1Layout	layout = `Convolution_v1Layout::kNone`
	)

inline

This operator is DEPRECATED. Apply convolution to input then add a bias.

Parameters

symbol_name	name of the resulting symbol
data	Input data to the ConvolutionV1Op.
weight	Weight matrix.
bias	Bias parameter.
kernel	convolution kernel size: (h, w) or (d, h, w)
num_filter	convolution filter(channel) number
stride	convolution stride: (h, w) or (d, h, w)
dilate	convolution dilate: (h, w) or (d, h, w)
pad	pad for convolution: (h, w) or (d, h, w)
num_group	Number of group partitions. Equivalent to slicing input into num_group partitions, apply convolution on each, then concatenate the results
workspace	Maximum temporary workspace allowed for convolution (MB).This parameter determines the effective batch size of the convolution kernel, which may be smaller than the given batch size. Also, the workspace will be
no_bias	Whether to disable bias parameter.
cudnn_tune	Whether to pick convolution algo by running performance test. Leads to higher startup time but may give faster speed. Options are: 'off': no tuning 'limited_workspace': run test and pick the fastest algorithm that doesn't 'fastest': pick the fastest algorithm and ignore workspace limit. If set to None (default), behavior is determined by environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off, 1 for limited workspace (default), 2 for fastest.
cudnn_off	Turn off cudnn for this layer.
layout	Set layout for input, output and weight. Empty for default layout: NCHW for 2d and NCDHW for 3d.

Returns: new symbol

Symbol mxnet::cpp::Convolution_v1	(	Symbol	data,
		Symbol	weight,
		Symbol	bias,
		Shape	kernel,
		uint32_t	num_filter,
		Shape	stride = `Shape()`,
		Shape	dilate = `Shape()`,
		Shape	pad = `Shape()`,
		uint32_t	num_group = `1`,
		uint64_t	workspace = `1024`,
		bool	no_bias = `false`,
		Convolution_v1CudnnTune	cudnn_tune = `Convolution_v1CudnnTune::kNone`,
		bool	cudnn_off = `false`,
		Convolution_v1Layout	layout = `Convolution_v1Layout::kNone`
	)

inline

This operator is DEPRECATED. Apply convolution to input then add a bias.

Parameters

data	Input data to the ConvolutionV1Op.
weight	Weight matrix.
bias	Bias parameter.
kernel	convolution kernel size: (h, w) or (d, h, w)
num_filter	convolution filter(channel) number
stride	convolution stride: (h, w) or (d, h, w)
dilate	convolution dilate: (h, w) or (d, h, w)
pad	pad for convolution: (h, w) or (d, h, w)
num_group	Number of group partitions. Equivalent to slicing input into num_group partitions, apply convolution on each, then concatenate the results
workspace	Maximum temporary workspace allowed for convolution (MB).This parameter determines the effective batch size of the convolution kernel, which may be smaller than the given batch size. Also, the workspace will be
no_bias	Whether to disable bias parameter.
cudnn_tune	Whether to pick convolution algo by running performance test. Leads to higher startup time but may give faster speed. Options are: 'off': no tuning 'limited_workspace': run test and pick the fastest algorithm that doesn't 'fastest': pick the fastest algorithm and ignore workspace limit. If set to None (default), behavior is determined by environment variable MXNET_CUDNN_AUTOTUNE_DEFAULT: 0 for off, 1 for limited workspace (default), 2 for fastest.
cudnn_off	Turn off cudnn for this layer.
layout	Set layout for input, output and weight. Empty for default layout: NCHW for 2d and NCDHW for 3d.

Returns: new symbol

Symbol mxnet::cpp::Correlation	(	const std::string &	symbol_name,
		Symbol	data1,
		Symbol	data2,
		uint32_t	kernel_size = `1`,
		uint32_t	max_displacement = `1`,
		uint32_t	stride1 = `1`,
		uint32_t	stride2 = `1`,
		uint32_t	pad_size = `0`,
		bool	is_multiply = `true`
	)

inline

Applies correlation to inputs.

   The correlation layer performs multiplicative patch comparisons between two

   Given two multi-channel feature maps :math:`f_{1}, f_{2}`, with :math:`w`,
   the correlation layer lets the network compare each patch from :math:`f_{1}`

   For now we consider only a single comparison of two patches. The 'correlation'
   :math:`x_{2}` in the second map is then defined as:

   .. math::

   c(x_{1}, x_{2}) = \sum_{o \in [-k,k] \times [-k,k]} <f_{1}(x_{1} + o),

   for a square patch of size :math:`K:=2k+1`.

   Note that the equation above is identical to one step of a convolution in
   neural networks, but instead of convolving data with a filter, it convolves
   data. For this reason, it has no training weights.

   Computing :math:`c(x_{1}, x_{2})` involves :math:`c * K^{2}` multiplications.

   Given a maximum displacement :math:`d`, for each location :math:`x_{1}` it
   computes correlations :math:`c(x_{1}, x_{2})` only in a neighborhood of size
   by limiting the range of :math:`x_{2}`. We use strides :math:`s_{1}, s_{2}`, to
   quantize :math:`x_{1}` globally and to quantize :math:`x_{2}` within the
   centered around :math:`x_{1}`.

   The final output is defined by the following expression:

   .. math::
   out[n, q, i, j] = c(x_{i, j}, x_{q})

   where :math:`i` and :math:`j` enumerate spatial locations in :math:`f_{1}`, and


   Defined in src/operator/correlation.cc:L198

Parameters

symbol_name	name of the resulting symbol
data1	Input data1 to the correlation.
data2	Input data2 to the correlation.
kernel_size	kernel size for Correlation must be an odd number
max_displacement	Max displacement of Correlation
stride1	stride1 quantize data1 globally
stride2	stride2 quantize data2 within the neighborhood centered around data1
pad_size	pad for Correlation
is_multiply	operation type is either multiplication or subduction

Returns: new symbol

Symbol mxnet::cpp::Correlation	(	Symbol	data1,
		Symbol	data2,
		uint32_t	kernel_size = `1`,
		uint32_t	max_displacement = `1`,
		uint32_t	stride1 = `1`,
		uint32_t	stride2 = `1`,
		uint32_t	pad_size = `0`,
		bool	is_multiply = `true`
	)

inline

Applies correlation to inputs.

   The correlation layer performs multiplicative patch comparisons between two

   Given two multi-channel feature maps :math:`f_{1}, f_{2}`, with :math:`w`,
   the correlation layer lets the network compare each patch from :math:`f_{1}`

   For now we consider only a single comparison of two patches. The 'correlation'
   :math:`x_{2}` in the second map is then defined as:

   .. math::

   c(x_{1}, x_{2}) = \sum_{o \in [-k,k] \times [-k,k]} <f_{1}(x_{1} + o),

   for a square patch of size :math:`K:=2k+1`.

   Note that the equation above is identical to one step of a convolution in
   neural networks, but instead of convolving data with a filter, it convolves
   data. For this reason, it has no training weights.

   Computing :math:`c(x_{1}, x_{2})` involves :math:`c * K^{2}` multiplications.

   Given a maximum displacement :math:`d`, for each location :math:`x_{1}` it
   computes correlations :math:`c(x_{1}, x_{2})` only in a neighborhood of size
   by limiting the range of :math:`x_{2}`. We use strides :math:`s_{1}, s_{2}`, to
   quantize :math:`x_{1}` globally and to quantize :math:`x_{2}` within the
   centered around :math:`x_{1}`.

   The final output is defined by the following expression:

   .. math::
   out[n, q, i, j] = c(x_{i, j}, x_{q})

   where :math:`i` and :math:`j` enumerate spatial locations in :math:`f_{1}`, and


   Defined in src/operator/correlation.cc:L198

Parameters

data1	Input data1 to the correlation.
data2	Input data2 to the correlation.
kernel_size	kernel size for Correlation must be an odd number
max_displacement	Max displacement of Correlation
stride1	stride1 quantize data1 globally
stride2	stride2 quantize data2 within the neighborhood centered around data1
pad_size	pad for Correlation
is_multiply	operation type is either multiplication or subduction

Returns: new symbol

Symbol mxnet::cpp::cos	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Computes the element-wise cosine of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   cos([0, \pi/4, \pi/2]) = [1, 0.707, 0]

   The storage type of ``cos`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L63

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::cos ( Symbol data )

inline

Computes the element-wise cosine of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   cos([0, \pi/4, \pi/2]) = [1, 0.707, 0]

   The storage type of ``cos`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L63

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::cosh	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns the hyperbolic cosine of the input array, computed element-wise.

   .. math::
   cosh(x) = 0.5\times(exp(x) + exp(-x))

   The storage type of ``cosh`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L216

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::cosh ( Symbol data )

inline

Returns the hyperbolic cosine of the input array, computed element-wise.

   .. math::
   cosh(x) = 0.5\times(exp(x) + exp(-x))

   The storage type of ``cosh`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L216

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::Crop	(	const std::string &	symbol_name,
		int	num_args,
		Symbol	data,
		Symbol	crop_like,
		Shape	offset = `Shape(0, 0)`,
		Shape	h_w = `Shape(0, 0)`,
		bool	center_crop = `false`
	)

inline

Symbol mxnet::cpp::Crop	(	const std::string &	symbol_name,
		const std::vector< Symbol > &	data,
		int	num_args,
		Shape	offset = `Shape(0,0)`,
		Shape	h_w = `Shape(0,0)`,
		bool	center_crop = `false`
	)

inline

   .. note:: `Crop` is deprecated. Use `slice` instead.

   Crop the 2nd and 3rd dim of input data, with the corresponding size of h_w or
   with width and height of the second input symbol, i.e., with one input, we need
   specify the crop height and width, otherwise the second input symbol's size


   Defined in src/operator/crop.cc:L50

Parameters

symbol_name	name of the resulting symbol
data	Tensor or List of Tensors, the second input will be used as crop_like
num_args	Number of inputs for crop, if equals one, then we will use the h_wfor crop height and width, else if equals two, then we will use the heightand width
offset	crop offset coordinate: (y, x)
h_w	crop height and width: (h, w)
center_crop	If set to true, then it will use be the center_crop,or it will crop

Returns: new symbol

Symbol mxnet::cpp::Crop	(	const std::vector< Symbol > &	data,
		int	num_args,
		Shape	offset = `Shape(0,0)`,
		Shape	h_w = `Shape(0,0)`,
		bool	center_crop = `false`
	)

inline

   .. note:: `Crop` is deprecated. Use `slice` instead.

   Crop the 2nd and 3rd dim of input data, with the corresponding size of h_w or
   with width and height of the second input symbol, i.e., with one input, we need
   specify the crop height and width, otherwise the second input symbol's size


   Defined in src/operator/crop.cc:L50

Parameters

data	Tensor or List of Tensors, the second input will be used as crop_like
num_args	Number of inputs for crop, if equals one, then we will use the h_wfor crop height and width, else if equals two, then we will use the heightand width
offset	crop offset coordinate: (y, x)
h_w	crop height and width: (h, w)
center_crop	If set to true, then it will use be the center_crop,or it will crop

Returns: new symbol

Symbol mxnet::cpp::Custom	(	const std::string &	symbol_name,
		const std::vector< Symbol > &	data,
		const std::string &	op_type
	)

inline

Apply a custom operator implemented in a frontend language (like Python).

   Custom operators should override required methods like `forward` and `backward`.
   The custom operator must be registered before it can be used.
   Please check the tutorial here: /versions/1.3.1/faq/new_op.html.



   Defined in src/operator/custom/custom.cc:L547

Parameters

symbol_name	name of the resulting symbol
data	Input data for the custom operator.
op_type	Name of the custom operator. This is the name that is passed to

Returns: new symbol

Symbol mxnet::cpp::Custom	(	const std::vector< Symbol > &	data,
		const std::string &	op_type
	)

inline

Apply a custom operator implemented in a frontend language (like Python).

   Custom operators should override required methods like `forward` and `backward`.
   The custom operator must be registered before it can be used.
   Please check the tutorial here: /versions/1.3.1/faq/new_op.html.



   Defined in src/operator/custom/custom.cc:L547

Parameters

data	Input data for the custom operator.
op_type	Name of the custom operator. This is the name that is passed to

Returns: new symbol

Symbol mxnet::cpp::Deconvolution	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	weight,
		Symbol	bias,
		Shape	kernel,
		uint32_t	num_filter,
		Shape	stride = `Shape()`,
		Shape	dilate = `Shape()`,
		Shape	pad = `Shape()`,
		Shape	adj = `Shape()`,
		Shape	target_shape = `Shape()`,
		uint32_t	num_group = `1`,
		uint64_t	workspace = `512`,
		bool	no_bias = `true`,
		DeconvolutionCudnnTune	cudnn_tune = `DeconvolutionCudnnTune::kNone`,
		bool	cudnn_off = `false`,
		DeconvolutionLayout	layout = `DeconvolutionLayout::kNone`
	)

inline

Computes 1D or 2D transposed convolution (aka fractionally strided convolution) of the input tensor. This operation can be seen as the gradient of Convolution operation with respect to its input. Convolution usually reduces the size of the input. Transposed convolution works the other way, going from a smaller

Parameters

symbol_name	name of the resulting symbol
data	Input tensor to the deconvolution operation.
weight	Weights representing the kernel.
bias	Bias added to the result after the deconvolution operation.
kernel	Deconvolution kernel size: (w,), (h, w) or (d, h, w). This is same as
num_filter	Number of output filters.
stride	The stride used for the corresponding convolution: (w,), (h, w) or (d,
dilate	Dilation factor for each dimension of the input: (w,), (h, w) or (d, h,
pad	The amount of implicit zero padding added during convolution for each dimension of the input: (w,), (h, w) or (d, h, w). `(kernel-1)/2` is usually a good choice. If `target_shape` is set, `pad` will be ignored and a padding
adj	Adjustment for output shape: (w,), (h, w) or (d, h, w). If `target_shape`
target_shape	Shape of the output tensor: (w,), (h, w) or (d, h, w).
num_group	Number of groups partition.
workspace	Maximum temporary workspace allowed (MB) in deconvolution.This parameter has two usages. When CUDNN is not used, it determines the effective batch size of the deconvolution kernel. When CUDNN is used, it controls the maximum temporary storage used for tuning the best CUDNN kernel when
no_bias	Whether to disable bias parameter.
cudnn_tune	Whether to pick convolution algorithm by running performance test.
cudnn_off	Turn off cudnn for this layer.
layout	Set layout for input, output and weight. Empty for default layout, NCW

Returns: new symbol

Symbol mxnet::cpp::Deconvolution	(	Symbol	data,
		Symbol	weight,
		Symbol	bias,
		Shape	kernel,
		uint32_t	num_filter,
		Shape	stride = `Shape()`,
		Shape	dilate = `Shape()`,
		Shape	pad = `Shape()`,
		Shape	adj = `Shape()`,
		Shape	target_shape = `Shape()`,
		uint32_t	num_group = `1`,
		uint64_t	workspace = `512`,
		bool	no_bias = `true`,
		DeconvolutionCudnnTune	cudnn_tune = `DeconvolutionCudnnTune::kNone`,
		bool	cudnn_off = `false`,
		DeconvolutionLayout	layout = `DeconvolutionLayout::kNone`
	)

inline

Computes 1D or 2D transposed convolution (aka fractionally strided convolution) of the input tensor. This operation can be seen as the gradient of Convolution operation with respect to its input. Convolution usually reduces the size of the input. Transposed convolution works the other way, going from a smaller

Parameters

data	Input tensor to the deconvolution operation.
weight	Weights representing the kernel.
bias	Bias added to the result after the deconvolution operation.
kernel	Deconvolution kernel size: (w,), (h, w) or (d, h, w). This is same as
num_filter	Number of output filters.
stride	The stride used for the corresponding convolution: (w,), (h, w) or (d,
dilate	Dilation factor for each dimension of the input: (w,), (h, w) or (d, h,
pad	The amount of implicit zero padding added during convolution for each dimension of the input: (w,), (h, w) or (d, h, w). `(kernel-1)/2` is usually a good choice. If `target_shape` is set, `pad` will be ignored and a padding
adj	Adjustment for output shape: (w,), (h, w) or (d, h, w). If `target_shape`
target_shape	Shape of the output tensor: (w,), (h, w) or (d, h, w).
num_group	Number of groups partition.
workspace	Maximum temporary workspace allowed (MB) in deconvolution.This parameter has two usages. When CUDNN is not used, it determines the effective batch size of the deconvolution kernel. When CUDNN is used, it controls the maximum temporary storage used for tuning the best CUDNN kernel when
no_bias	Whether to disable bias parameter.
cudnn_tune	Whether to pick convolution algorithm by running performance test.
cudnn_off	Turn off cudnn for this layer.
layout	Set layout for input, output and weight. Empty for default layout, NCW

Returns: new symbol

Symbol mxnet::cpp::degrees	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Converts each element of the input array from radians to degrees.

   .. math::
   degrees([0, \pi/2, \pi, 3\pi/2, 2\pi]) = [0, 90, 180, 270, 360]

   The storage type of ``degrees`` output depends upon the input storage type:

   - degrees(default) = default
   - degrees(row_sparse) = row_sparse
   - degrees(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L163

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::degrees ( Symbol data )

inline

Converts each element of the input array from radians to degrees.

   .. math::
   degrees([0, \pi/2, \pi, 3\pi/2, 2\pi]) = [0, 90, 180, 270, 360]

   The storage type of ``degrees`` output depends upon the input storage type:

   - degrees(default) = default
   - degrees(row_sparse) = row_sparse
   - degrees(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L163

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::depth_to_space	(	const std::string &	symbol_name,
		Symbol	data,
		int	block_size
	)

inline

Rearranges(permutes) data from depth into blocks of spatial data. Similar to ONNX DepthToSpace operator: https://github.com/onnx/onnx/blob/master/docs/Operators.md#DepthToSpace. The output is a new tensor where the values from depth dimension are moved in to height and width dimension. The reverse of this operation is

.. math::

{gather*} x = reshape(x, [N, block_size, block_size, C / (block_size ^ 2), H * x = transpose(x , [0, 3, 4, 1, 5, 2]) \ y = reshape(x , [N, C / (block_size ^ 2), H * block_size, W * {gather*}

where :math:x is an input tensor with default layout as :math:[N, C, H, W]: and :math:y is the output tensor of layout :math:`[N, C / (block_size ^ 2),

Example::

x = [[[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]], [[12, 13, 14], [15, 16, 17]], [[18, 19, 20], [21, 22, 23]]]]

depth_to_space(x, 2) = [[[[0, 6, 1, 7, 2, 8], [12, 18, 13, 19, 14, 20], [3, 9, 4, 10, 5, 11], [15, 21, 16, 22, 17, 23]]]]

   Defined in src/operator/tensor/matrix_op.cc:L945

Parameters

symbol_name	name of the resulting symbol
data	Input ndarray
block_size	Blocks of [block_size. block_size] are moved

Returns: new symbol

Symbol mxnet::cpp::depth_to_space	(	Symbol	data,
		int	block_size
	)

inline

Rearranges(permutes) data from depth into blocks of spatial data. Similar to ONNX DepthToSpace operator: https://github.com/onnx/onnx/blob/master/docs/Operators.md#DepthToSpace. The output is a new tensor where the values from depth dimension are moved in to height and width dimension. The reverse of this operation is

.. math::

{gather*} x = reshape(x, [N, block_size, block_size, C / (block_size ^ 2), H * x = transpose(x , [0, 3, 4, 1, 5, 2]) \ y = reshape(x , [N, C / (block_size ^ 2), H * block_size, W * {gather*}

where :math:x is an input tensor with default layout as :math:[N, C, H, W]: and :math:y is the output tensor of layout :math:`[N, C / (block_size ^ 2),

Example::

x = [[[[0, 1, 2], [3, 4, 5]], [[6, 7, 8], [9, 10, 11]], [[12, 13, 14], [15, 16, 17]], [[18, 19, 20], [21, 22, 23]]]]

depth_to_space(x, 2) = [[[[0, 6, 1, 7, 2, 8], [12, 18, 13, 19, 14, 20], [3, 9, 4, 10, 5, 11], [15, 21, 16, 22, 17, 23]]]]

   Defined in src/operator/tensor/matrix_op.cc:L945

Parameters

data	Input ndarray
block_size	Blocks of [block_size. block_size] are moved

Returns: new symbol

Symbol mxnet::cpp::diag	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< int >	k = `dmlc::optional<int>(0)`
	)

inline

Extracts a diagonal or constructs a diagonal array.

   ``diag``'s behavior depends on the input array dimensions:

   - 1-D arrays: constructs a 2-D array with the input as its diagonal, all other
   - 2-D arrays: returns elements in the diagonal as a new 1-D array
   - N-D arrays: not supported yet

   Examples::

   x = [[1, 2, 3],
   [4, 5, 6]]

   diag(x) = [1, 5]

   diag(x, k=1) = [2, 6]

   diag(x, k=-1) = [4]

   x = [1, 2, 3]

   diag(x) = [[1, 0, 0],
   [0, 2, 0],
   [0, 0, 3]]

   diag(x, k=1) = [[0, 1, 0],
   [0, 0, 2],
   [0, 0, 0]]

   diag(x, k=-1) = [[0, 0, 0],
   [1, 0, 0],
   [0, 2, 0]]



   Defined in src/operator/tensor/diag_op.cc:L68

Parameters

symbol_name	name of the resulting symbol
data	Input ndarray
k	Diagonal in question. The default is 0. Use k>0 for diagonals above the main diagonal, and k<0 for diagonals below the main diagonal. If input has shape (S0

Returns: new symbol

Symbol mxnet::cpp::diag	(	Symbol	data,
		dmlc::optional< int >	k = `dmlc::optional<int>(0)`
	)

inline

Extracts a diagonal or constructs a diagonal array.

   ``diag``'s behavior depends on the input array dimensions:

   - 1-D arrays: constructs a 2-D array with the input as its diagonal, all other
   - 2-D arrays: returns elements in the diagonal as a new 1-D array
   - N-D arrays: not supported yet

   Examples::

   x = [[1, 2, 3],
   [4, 5, 6]]

   diag(x) = [1, 5]

   diag(x, k=1) = [2, 6]

   diag(x, k=-1) = [4]

   x = [1, 2, 3]

   diag(x) = [[1, 0, 0],
   [0, 2, 0],
   [0, 0, 3]]

   diag(x, k=1) = [[0, 1, 0],
   [0, 0, 2],
   [0, 0, 0]]

   diag(x, k=-1) = [[0, 0, 0],
   [1, 0, 0],
   [0, 2, 0]]



   Defined in src/operator/tensor/diag_op.cc:L68

Parameters

data	Input ndarray
k	Diagonal in question. The default is 0. Use k>0 for diagonals above the main diagonal, and k<0 for diagonals below the main diagonal. If input has shape (S0

Returns: new symbol

Symbol mxnet::cpp::dot	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs,
		bool	transpose_a = `false`,
		bool	transpose_b = `false`,
		DotForwardStype	forward_stype = `DotForwardStype::kNone`
	)

inline

Dot product of two arrays.

   ``dot``'s behavior depends on the input array dimensions:

   - 1-D arrays: inner product of vectors
   - 2-D arrays: matrix multiplication
   - N-D arrays: a sum product over the last axis of the first input and the first
   axis of the second input

   For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape
   result array will have shape `(n,m,r,s)`. It is computed by::

   dot(x,y)[i,j,a,b] = sum(x[i,j,:]*y[:,a,b])

   Example::

   x = reshape([0,1,2,3,4,5,6,7], shape=(2,2,2))
   y = reshape([7,6,5,4,3,2,1,0], shape=(2,2,2))
   dot(x,y)[0,0,1,1] = 0
   sum(x[0,0,:]*y[:,1,1]) = 0

   The storage type of ``dot`` output depends on storage types of inputs,
   forward_stype option for output storage type. Implemented sparse operations

   - dot(default, default, transpose_a=True/False, transpose_b=True/False) =
   - dot(csr, default, transpose_a=True) = default
   - dot(csr, default, transpose_a=True) = row_sparse
   - dot(csr, default) = default
   - dot(csr, row_sparse) = default
   - dot(default, csr) = csr (CPU only)
   - dot(default, csr, forward_stype='default') = default
   - dot(default, csr, transpose_b=True, forward_stype='default') = default

   If the combination of input storage types and forward_stype does not match any
   above patterns, ``dot`` will fallback and generate output with default storage.

   .. Note::

   If the storage type of the lhs is "csr", the storage type of gradient w.r.t rhs
   "row_sparse". Only a subset of optimizers support sparse gradients, including
   and Adam. Note that by default lazy updates is turned on, which may perform
   from standard updates. For more details, please check the Optimization API at:
   /api/python/optimization/optimization.html



   Defined in src/operator/tensor/dot.cc:L77

Parameters

symbol_name	name of the resulting symbol
lhs	The first input
rhs	The second input
transpose_a	If true then transpose the first input before dot.
transpose_b	If true then transpose the second input before dot.
forward_stype	The desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still

Returns: new symbol

Symbol mxnet::cpp::dot	(	Symbol	lhs,
		Symbol	rhs,
		bool	transpose_a = `false`,
		bool	transpose_b = `false`,
		DotForwardStype	forward_stype = `DotForwardStype::kNone`
	)

inline

Dot product of two arrays.

   ``dot``'s behavior depends on the input array dimensions:

   - 1-D arrays: inner product of vectors
   - 2-D arrays: matrix multiplication
   - N-D arrays: a sum product over the last axis of the first input and the first
   axis of the second input

   For example, given 3-D ``x`` with shape `(n,m,k)` and ``y`` with shape
   result array will have shape `(n,m,r,s)`. It is computed by::

   dot(x,y)[i,j,a,b] = sum(x[i,j,:]*y[:,a,b])

   Example::

   x = reshape([0,1,2,3,4,5,6,7], shape=(2,2,2))
   y = reshape([7,6,5,4,3,2,1,0], shape=(2,2,2))
   dot(x,y)[0,0,1,1] = 0
   sum(x[0,0,:]*y[:,1,1]) = 0

   The storage type of ``dot`` output depends on storage types of inputs,
   forward_stype option for output storage type. Implemented sparse operations

   - dot(default, default, transpose_a=True/False, transpose_b=True/False) =
   - dot(csr, default, transpose_a=True) = default
   - dot(csr, default, transpose_a=True) = row_sparse
   - dot(csr, default) = default
   - dot(csr, row_sparse) = default
   - dot(default, csr) = csr (CPU only)
   - dot(default, csr, forward_stype='default') = default
   - dot(default, csr, transpose_b=True, forward_stype='default') = default

   If the combination of input storage types and forward_stype does not match any
   above patterns, ``dot`` will fallback and generate output with default storage.

   .. Note::

   If the storage type of the lhs is "csr", the storage type of gradient w.r.t rhs
   "row_sparse". Only a subset of optimizers support sparse gradients, including
   and Adam. Note that by default lazy updates is turned on, which may perform
   from standard updates. For more details, please check the Optimization API at:
   /api/python/optimization/optimization.html



   Defined in src/operator/tensor/dot.cc:L77

Parameters

lhs	The first input
rhs	The second input
transpose_a	If true then transpose the first input before dot.
transpose_b	If true then transpose the second input before dot.
forward_stype	The desired storage type of the forward output given by user, if thecombination of input storage types and this hint does not matchany implemented ones, the dot operator will perform fallback operationand still

Returns: new symbol

Symbol mxnet::cpp::Dropout	(	const std::string &	symbol_name,
		Symbol	data,
		mx_float	p = `0.5`,
		DropoutMode	mode = `DropoutMode::kTraining`,
		Shape	axes = `Shape()`
	)

inline

Applies dropout operation to input array.

   - During training, each element of the input is set to zero with probability p.
   The whole array is rescaled by :math:`1/(1-p)` to keep the expected
   sum of the input unchanged.

   - During testing, this operator does not change the input if mode is 'training'.
   If mode is 'always', the same computaion as during training will be applied.

   Example::

   random.seed(998)
   input_array = array([[3., 0.5,  -0.5,  2., 7.],
   [2., -0.4,   7.,  3., 0.2]])
   a = symbol.Variable('a')
   dropout = symbol.Dropout(a, p = 0.2)
   executor = dropout.simple_bind(a = input_array.shape)

   ## If training
   executor.forward(is_train = True, a = input_array)
   executor.outputs
   [[ 3.75   0.625 -0.     2.5    8.75 ]
   [ 2.5   -0.5    8.75   3.75   0.   ]]

   ## If testing
   executor.forward(is_train = False, a = input_array)
   executor.outputs
   [[ 3.     0.5   -0.5    2.     7.   ]
   [ 2.    -0.4    7.     3.     0.2  ]]


   Defined in src/operator/nn/dropout.cc:L76

Parameters

symbol_name	name of the resulting symbol
data	Input array to which dropout will be applied.
p	Fraction of the input that gets dropped out during training time.
mode	Whether to only turn on dropout during training or to also turn on for
axes	Axes for variational dropout kernel.

Returns: new symbol

Symbol mxnet::cpp::Dropout	(	Symbol	data,
		mx_float	p = `0.5`,
		DropoutMode	mode = `DropoutMode::kTraining`,
		Shape	axes = `Shape()`
	)

inline

Applies dropout operation to input array.

   - During training, each element of the input is set to zero with probability p.
   The whole array is rescaled by :math:`1/(1-p)` to keep the expected
   sum of the input unchanged.

   - During testing, this operator does not change the input if mode is 'training'.
   If mode is 'always', the same computaion as during training will be applied.

   Example::

   random.seed(998)
   input_array = array([[3., 0.5,  -0.5,  2., 7.],
   [2., -0.4,   7.,  3., 0.2]])
   a = symbol.Variable('a')
   dropout = symbol.Dropout(a, p = 0.2)
   executor = dropout.simple_bind(a = input_array.shape)

   ## If training
   executor.forward(is_train = True, a = input_array)
   executor.outputs
   [[ 3.75   0.625 -0.     2.5    8.75 ]
   [ 2.5   -0.5    8.75   3.75   0.   ]]

   ## If testing
   executor.forward(is_train = False, a = input_array)
   executor.outputs
   [[ 3.     0.5   -0.5    2.     7.   ]
   [ 2.    -0.4    7.     3.     0.2  ]]


   Defined in src/operator/nn/dropout.cc:L76

Parameters

data	Input array to which dropout will be applied.
p	Fraction of the input that gets dropped out during training time.
mode	Whether to only turn on dropout during training or to also turn on for
axes	Axes for variational dropout kernel.

Returns: new symbol

Symbol mxnet::cpp::elemwise_add	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Adds arguments element-wise.

   The storage type of ``elemwise_add`` output depends on storage types of inputs

   - elemwise_add(row_sparse, row_sparse) = row_sparse
   - elemwise_add(csr, csr) = csr
   - elemwise_add(default, csr) = default
   - elemwise_add(csr, default) = default
   - elemwise_add(default, rsp) = default
   - elemwise_add(rsp, default) = default
   - otherwise, ``elemwise_add`` generates output with default storage

Parameters

symbol_name	name of the resulting symbol
lhs	first input
rhs	second input

Returns: new symbol

Symbol mxnet::cpp::elemwise_add	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Adds arguments element-wise.

   The storage type of ``elemwise_add`` output depends on storage types of inputs

   - elemwise_add(row_sparse, row_sparse) = row_sparse
   - elemwise_add(csr, csr) = csr
   - elemwise_add(default, csr) = default
   - elemwise_add(csr, default) = default
   - elemwise_add(default, rsp) = default
   - elemwise_add(rsp, default) = default
   - otherwise, ``elemwise_add`` generates output with default storage

Parameters

lhs	first input
rhs	second input

Returns: new symbol

Symbol mxnet::cpp::elemwise_div	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Divides arguments element-wise.

   The storage type of ``elemwise_div`` output is always dense

Parameters

symbol_name	name of the resulting symbol
lhs	first input
rhs	second input

Returns: new symbol

Symbol mxnet::cpp::elemwise_div	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Divides arguments element-wise.

   The storage type of ``elemwise_div`` output is always dense

Parameters

lhs	first input
rhs	second input

Returns: new symbol

Symbol mxnet::cpp::elemwise_mul	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Multiplies arguments element-wise.

   The storage type of ``elemwise_mul`` output depends on storage types of inputs

   - elemwise_mul(default, default) = default
   - elemwise_mul(row_sparse, row_sparse) = row_sparse
   - elemwise_mul(default, row_sparse) = row_sparse
   - elemwise_mul(row_sparse, default) = row_sparse
   - elemwise_mul(csr, csr) = csr
   - otherwise, ``elemwise_mul`` generates output with default storage

Parameters

symbol_name	name of the resulting symbol
lhs	first input
rhs	second input

Returns: new symbol

Symbol mxnet::cpp::elemwise_mul	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Multiplies arguments element-wise.

   The storage type of ``elemwise_mul`` output depends on storage types of inputs

   - elemwise_mul(default, default) = default
   - elemwise_mul(row_sparse, row_sparse) = row_sparse
   - elemwise_mul(default, row_sparse) = row_sparse
   - elemwise_mul(row_sparse, default) = row_sparse
   - elemwise_mul(csr, csr) = csr
   - otherwise, ``elemwise_mul`` generates output with default storage

Parameters

lhs	first input
rhs	second input

Returns: new symbol

Symbol mxnet::cpp::elemwise_sub	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Subtracts arguments element-wise.

   The storage type of ``elemwise_sub`` output depends on storage types of inputs

   - elemwise_sub(row_sparse, row_sparse) = row_sparse
   - elemwise_sub(csr, csr) = csr
   - elemwise_sub(default, csr) = default
   - elemwise_sub(csr, default) = default
   - elemwise_sub(default, rsp) = default
   - elemwise_sub(rsp, default) = default
   - otherwise, ``elemwise_sub`` generates output with default storage

Parameters

symbol_name	name of the resulting symbol
lhs	first input
rhs	second input

Returns: new symbol

Symbol mxnet::cpp::elemwise_sub	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Subtracts arguments element-wise.

   The storage type of ``elemwise_sub`` output depends on storage types of inputs

   - elemwise_sub(row_sparse, row_sparse) = row_sparse
   - elemwise_sub(csr, csr) = csr
   - elemwise_sub(default, csr) = default
   - elemwise_sub(csr, default) = default
   - elemwise_sub(default, rsp) = default
   - elemwise_sub(rsp, default) = default
   - otherwise, ``elemwise_sub`` generates output with default storage

Parameters

lhs	first input
rhs	second input

Returns: new symbol

Symbol mxnet::cpp::Embedding	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	weight,
		int	input_dim,
		int	output_dim,
		EmbeddingDtype	dtype = `EmbeddingDtype::kFloat32`,
		bool	sparse_grad = `false`
	)

inline

Maps integer indices to vector representations (embeddings).

   This operator maps words to real-valued vectors in a high-dimensional space,
   called word embeddings. These embeddings can capture semantic and syntactic
   For example, it has been noted that in the learned embedding spaces, similar
   to be close to each other and dissimilar words far apart.

   For an input array of shape (d1, ..., dK),
   the shape of an output array is (d1, ..., dK, output_dim).
   All the input values should be integers in the range [0, input_dim).

   If the input_dim is ip0 and output_dim is op0, then shape of the embedding
   (ip0, op0).

   By default, if any index mentioned is too large, it is replaced by the index
   the last vector in an embedding matrix.

   Examples::

   input_dim = 4
   output_dim = 5

   // Each row in weight matrix y represents a word. So, y = (w0,w1,w2,w3)
   y = [[  0.,   1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.,   9.],
   [ 10.,  11.,  12.,  13.,  14.],
   [ 15.,  16.,  17.,  18.,  19.]]

   // Input array x represents n-grams(2-gram). So, x = [(w1,w3), (w0,w2)]
   x = [[ 1.,  3.],
   [ 0.,  2.]]

   // Mapped input x to its vector representation y.
   Embedding(x, y, 4, 5) = [[[  5.,   6.,   7.,   8.,   9.],
   [ 15.,  16.,  17.,  18.,  19.]],

   [[  0.,   1.,   2.,   3.,   4.],
   [ 10.,  11.,  12.,  13.,  14.]]]


   The storage type of weight can be either row_sparse or default.

   .. Note::

   If "sparse_grad" is set to True, the storage type of gradient w.r.t weights
   "row_sparse". Only a subset of optimizers support sparse gradients, including
   and Adam. Note that by default lazy updates is turned on, which may perform
   from standard updates. For more details, please check the Optimization API at:
   /api/python/optimization/optimization.html



   Defined in src/operator/tensor/indexing_op.cc:L267

Parameters

symbol_name	name of the resulting symbol
data	The input array to the embedding operator.
weight	The embedding weight matrix.
input_dim	Vocabulary size of the input indices.
output_dim	Dimension of the embedding vectors.
dtype	Data type of weight.
sparse_grad	Compute row sparse gradient in the backward calculation. If set to

Returns: new symbol

Symbol mxnet::cpp::Embedding	(	Symbol	data,
		Symbol	weight,
		int	input_dim,
		int	output_dim,
		EmbeddingDtype	dtype = `EmbeddingDtype::kFloat32`,
		bool	sparse_grad = `false`
	)

inline

Maps integer indices to vector representations (embeddings).

   This operator maps words to real-valued vectors in a high-dimensional space,
   called word embeddings. These embeddings can capture semantic and syntactic
   For example, it has been noted that in the learned embedding spaces, similar
   to be close to each other and dissimilar words far apart.

   For an input array of shape (d1, ..., dK),
   the shape of an output array is (d1, ..., dK, output_dim).
   All the input values should be integers in the range [0, input_dim).

   If the input_dim is ip0 and output_dim is op0, then shape of the embedding
   (ip0, op0).

   By default, if any index mentioned is too large, it is replaced by the index
   the last vector in an embedding matrix.

   Examples::

   input_dim = 4
   output_dim = 5

   // Each row in weight matrix y represents a word. So, y = (w0,w1,w2,w3)
   y = [[  0.,   1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.,   9.],
   [ 10.,  11.,  12.,  13.,  14.],
   [ 15.,  16.,  17.,  18.,  19.]]

   // Input array x represents n-grams(2-gram). So, x = [(w1,w3), (w0,w2)]
   x = [[ 1.,  3.],
   [ 0.,  2.]]

   // Mapped input x to its vector representation y.
   Embedding(x, y, 4, 5) = [[[  5.,   6.,   7.,   8.,   9.],
   [ 15.,  16.,  17.,  18.,  19.]],

   [[  0.,   1.,   2.,   3.,   4.],
   [ 10.,  11.,  12.,  13.,  14.]]]


   The storage type of weight can be either row_sparse or default.

   .. Note::

   If "sparse_grad" is set to True, the storage type of gradient w.r.t weights
   "row_sparse". Only a subset of optimizers support sparse gradients, including
   and Adam. Note that by default lazy updates is turned on, which may perform
   from standard updates. For more details, please check the Optimization API at:
   /api/python/optimization/optimization.html



   Defined in src/operator/tensor/indexing_op.cc:L267

Parameters

data	The input array to the embedding operator.
weight	The embedding weight matrix.
input_dim	Vocabulary size of the input indices.
output_dim	Dimension of the embedding vectors.
dtype	Data type of weight.
sparse_grad	Compute row sparse gradient in the backward calculation. If set to

Returns: new symbol

Symbol mxnet::cpp::exp	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise exponential value of the input.

   .. math::
   exp(x) = e^x \approx 2.718^x

   Example::

   exp([0, 1, 2]) = [1., 2.71828175, 7.38905621]

   The storage type of ``exp`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L929

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::exp ( Symbol data )

inline

Returns element-wise exponential value of the input.

   .. math::
   exp(x) = e^x \approx 2.718^x

   Example::

   exp([0, 1, 2]) = [1., 2.71828175, 7.38905621]

   The storage type of ``exp`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L929

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::expand_dims	(	const std::string &	symbol_name,
		Symbol	data,
		int	axis
	)

inline

Inserts a new axis of size 1 into the array shape

   For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1)``
   will return a new array with shape ``(2,1,3,4)``.



   Defined in src/operator/tensor/matrix_op.cc:L347

Parameters

symbol_name	name of the resulting symbol
data	Source input
axis	Position where new axis is to be inserted. Suppose that the input `NDArray`'s dimension is `ndim`, the range of the inserted axis is `[-ndim,

Returns: new symbol

Symbol mxnet::cpp::expand_dims	(	Symbol	data,
		int	axis
	)

inline

Inserts a new axis of size 1 into the array shape

   For example, given ``x`` with shape ``(2,3,4)``, then ``expand_dims(x, axis=1)``
   will return a new array with shape ``(2,1,3,4)``.



   Defined in src/operator/tensor/matrix_op.cc:L347

Parameters

data	Source input
axis	Position where new axis is to be inserted. Suppose that the input `NDArray`'s dimension is `ndim`, the range of the inserted axis is `[-ndim,

Returns: new symbol

Symbol mxnet::cpp::expm1	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns exp(x) - 1 computed element-wise on the input.

   This function provides greater precision than ``exp(x) - 1`` for small values

   The storage type of ``expm1`` output depends upon the input storage type:

   - expm1(default) = default
   - expm1(row_sparse) = row_sparse
   - expm1(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1008

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::expm1 ( Symbol data )

inline

Returns exp(x) - 1 computed element-wise on the input.

   This function provides greater precision than ``exp(x) - 1`` for small values

   The storage type of ``expm1`` output depends upon the input storage type:

   - expm1(default) = default
   - expm1(row_sparse) = row_sparse
   - expm1(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L1008

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::fill_element_0index	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	mhs,
		Symbol	rhs
	)

inline

Fill one element of each line(row for python, column for R/Julia) in lhs according to index indicated by rhs and values indicated by mhs. This function

Parameters

symbol_name	name of the resulting symbol
lhs	Left operand to the function.
mhs	Middle operand to the function.
rhs	Right operand to the function.

Returns: new symbol

Symbol mxnet::cpp::fill_element_0index	(	Symbol	lhs,
		Symbol	mhs,
		Symbol	rhs
	)

inline

Fill one element of each line(row for python, column for R/Julia) in lhs according to index indicated by rhs and values indicated by mhs. This function

Parameters

lhs	Left operand to the function.
mhs	Middle operand to the function.
rhs	Right operand to the function.

Returns: new symbol

Symbol mxnet::cpp::fix	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise rounded value to the nearest \ integer towards zero of the input.

Example::

fix([-2.1, -1.9, 1.9, 2.1]) = [-2., -1., 1., 2.]

The storage type of fix output depends upon the input storage type:

fix(default) = default
fix(row_sparse) = row_sparse
fix(csr) = csr

   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L803

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::fix ( Symbol data )

inline

Returns element-wise rounded value to the nearest \ integer towards zero of the input.

Example::

fix([-2.1, -1.9, 1.9, 2.1]) = [-2., -1., 1., 2.]

The storage type of fix output depends upon the input storage type:

fix(default) = default
fix(row_sparse) = row_sparse
fix(csr) = csr

   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L803

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::Flatten	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Flattens the input array into a 2-D array by collapsing the higher dimensions.

   .. note:: `Flatten` is deprecated. Use `flatten` instead.

   For an input array with shape ``(d1, d2, ..., dk)``, `flatten` operation
   the input array into an output array of shape ``(d1, d2*...*dk)``.

   Note that the bahavior of this function is different from numpy.ndarray.flatten,
   which behaves similar to mxnet.ndarray.reshape((-1,)).

   Example::

   x = [[
   [1,2,3],
   [4,5,6],
   [7,8,9]
   ],
   [    [1,2,3],
   [4,5,6],
   [7,8,9]
   ]],

   flatten(x) = [[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
   [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]



   Defined in src/operator/tensor/matrix_op.cc:L259

Parameters

symbol_name	name of the resulting symbol
data	Input array.

Returns: new symbol

Symbol mxnet::cpp::Flatten ( Symbol data )

inline

Flattens the input array into a 2-D array by collapsing the higher dimensions.

   .. note:: `Flatten` is deprecated. Use `flatten` instead.

   For an input array with shape ``(d1, d2, ..., dk)``, `flatten` operation
   the input array into an output array of shape ``(d1, d2*...*dk)``.

   Note that the bahavior of this function is different from numpy.ndarray.flatten,
   which behaves similar to mxnet.ndarray.reshape((-1,)).

   Example::

   x = [[
   [1,2,3],
   [4,5,6],
   [7,8,9]
   ],
   [    [1,2,3],
   [4,5,6],
   [7,8,9]
   ]],

   flatten(x) = [[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.],
   [ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9.]]



   Defined in src/operator/tensor/matrix_op.cc:L259

Parameters

data	Input array.

Returns: new symbol

Symbol mxnet::cpp::floor	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise floor of the input.

   The floor of the scalar x is the largest integer i, such that i <= x.

   Example::

   floor([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-3., -2.,  1.,  1.,  2.]

   The storage type of ``floor`` output depends upon the input storage type:

   - floor(default) = default
   - floor(row_sparse) = row_sparse
   - floor(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L765

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::floor ( Symbol data )

inline

Returns element-wise floor of the input.

   The floor of the scalar x is the largest integer i, such that i <= x.

   Example::

   floor([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-3., -2.,  1.,  1.,  2.]

   The storage type of ``floor`` output depends upon the input storage type:

   - floor(default) = default
   - floor(row_sparse) = row_sparse
   - floor(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L765

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::ftml_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		Symbol	d,
		Symbol	v,
		Symbol	z,
		mx_float	lr,
		int	t,
		mx_float	beta1 = `0.6`,
		mx_float	beta2 = `0.999`,
		double	epsilon = `1e-08`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_grad = `-1`
	)

inline

The FTML optimizer described in FTML - Follow the Moving Leader in Deep Learning, available at http://proceedings.mlr.press/v70/zheng17a/zheng17a.pdf.

.. math::

g_t = J(W_{t-1})\ v_t = v_{t-1} + (1 - ) g_t^2\ d_t = { 1 - ^t }{ } ({ { v_t }{ 1 - ^t } } = d_t - d_{t-1} z_t = z_{ t-1 } + (1 - ^t) g_t - W_{t-1} W_t = - { z_t }{ d_t }

   Defined in src/operator/optimizer_op.cc:L447

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
d	Internal state `d_t`
v	Internal state `v_t`
z	Internal state `z_t`
lr	Learning rate.
t	Number of update.
beta1	Generally close to 0.5.
beta2	Generally close to 1.
epsilon	Epsilon to prevent div 0.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_grad	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,

Returns: new symbol

Symbol mxnet::cpp::ftml_update	(	Symbol	weight,
		Symbol	grad,
		Symbol	d,
		Symbol	v,
		Symbol	z,
		mx_float	lr,
		int	t,
		mx_float	beta1 = `0.6`,
		mx_float	beta2 = `0.999`,
		double	epsilon = `1e-08`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_grad = `-1`
	)

inline

The FTML optimizer described in FTML - Follow the Moving Leader in Deep Learning, available at http://proceedings.mlr.press/v70/zheng17a/zheng17a.pdf.

.. math::

g_t = J(W_{t-1})\ v_t = v_{t-1} + (1 - ) g_t^2\ d_t = { 1 - ^t }{ } ({ { v_t }{ 1 - ^t } } = d_t - d_{t-1} z_t = z_{ t-1 } + (1 - ^t) g_t - W_{t-1} W_t = - { z_t }{ d_t }

   Defined in src/operator/optimizer_op.cc:L447

Parameters

weight	Weight
grad	Gradient
d	Internal state `d_t`
v	Internal state `v_t`
z	Internal state `z_t`
lr	Learning rate.
t	Number of update.
beta1	Generally close to 0.5.
beta2	Generally close to 1.
epsilon	Epsilon to prevent div 0.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_grad	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,

Returns: new symbol

Symbol mxnet::cpp::ftrl_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		Symbol	z,
		Symbol	n,
		mx_float	lr,
		mx_float	lamda1 = `0.01`,
		mx_float	beta = `1`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`
	)

inline

Update function for Ftrl optimizer. Referenced from Ad Click Prediction: a View from the Trenches, available at http://dl.acm.org/citation.cfm?id=2488200.

It updates the weights using::

rescaled_grad = clip(grad * rescale_grad, clip_gradient) z += rescaled_grad - (sqrt(n + rescaled_grad**2) - sqrt(n)) * weight / n += rescaled_grad**2 w = (sign(z) * lamda1 - z) / ((beta + sqrt(n)) / learning_rate + wd) * (abs(z)

If w, z and n are all of row_sparse storage type, only the row slices whose indices appear in grad.indices are updated (for w, z

for row in grad.indices: rescaled_grad[row] = clip(grad[row] * rescale_grad, clip_gradient) z[row] += rescaled_grad[row] - (sqrt(n[row] + rescaled_grad[row]**2) - n[row] += rescaled_grad[row]**2 w[row] = (sign(z[row]) * lamda1 - z[row]) / ((beta + sqrt(n[row])) /

   Defined in src/operator/optimizer_op.cc:L632

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
z	z
n	Square of grad
lr	Learning rate
lamda1	The L1 regularization coefficient.
beta	Per-Coordinate Learning Rate beta.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,

Returns: new symbol

Symbol mxnet::cpp::ftrl_update	(	Symbol	weight,
		Symbol	grad,
		Symbol	z,
		Symbol	n,
		mx_float	lr,
		mx_float	lamda1 = `0.01`,
		mx_float	beta = `1`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`
	)

inline

Update function for Ftrl optimizer. Referenced from Ad Click Prediction: a View from the Trenches, available at http://dl.acm.org/citation.cfm?id=2488200.

It updates the weights using::

rescaled_grad = clip(grad * rescale_grad, clip_gradient) z += rescaled_grad - (sqrt(n + rescaled_grad**2) - sqrt(n)) * weight / n += rescaled_grad**2 w = (sign(z) * lamda1 - z) / ((beta + sqrt(n)) / learning_rate + wd) * (abs(z)

If w, z and n are all of row_sparse storage type, only the row slices whose indices appear in grad.indices are updated (for w, z

for row in grad.indices: rescaled_grad[row] = clip(grad[row] * rescale_grad, clip_gradient) z[row] += rescaled_grad[row] - (sqrt(n[row] + rescaled_grad[row]**2) - n[row] += rescaled_grad[row]**2 w[row] = (sign(z[row]) * lamda1 - z[row]) / ((beta + sqrt(n[row])) /

   Defined in src/operator/optimizer_op.cc:L632

Parameters

weight	Weight
grad	Gradient
z	z
n	Square of grad
lr	Learning rate
lamda1	The L1 regularization coefficient.
beta	Per-Coordinate Learning Rate beta.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,

Returns: new symbol

Symbol mxnet::cpp::FullyConnected	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	weight,
		Symbol	bias,
		int	num_hidden,
		bool	no_bias = `false`,
		bool	flatten = `true`
	)

inline

Applies a linear transformation: :math:Y = XW^T + b.

   If ``flatten`` is set to be true, then the shapes are:

   - **data**: `(batch_size, x1, x2, ..., xn)`
   - **weight**: `(num_hidden, x1 * x2 * ... * xn)`
   - **bias**: `(num_hidden,)`
   - **out**: `(batch_size, num_hidden)`

   If ``flatten`` is set to be false, then the shapes are:

   - **data**: `(x1, x2, ..., xn, input_dim)`
   - **weight**: `(num_hidden, input_dim)`
   - **bias**: `(num_hidden,)`
   - **out**: `(x1, x2, ..., xn, num_hidden)`

   The learnable parameters include both ``weight`` and ``bias``.

   If ``no_bias`` is set to be true, then the ``bias`` term is ignored.

   .. Note::

   The sparse support for FullyConnected is limited to forward evaluation with
   weight and bias, where the length of `weight.indices` and `bias.indices` must
   to `num_hidden`. This could be useful for model inference with `row_sparse`
   trained with importance sampling or noise contrastive estimation.

   To compute linear transformation with 'csr' sparse data, sparse.dot is
   of sparse.FullyConnected.



   Defined in src/operator/nn/fully_connected.cc:L272

Parameters

symbol_name	name of the resulting symbol
data	Input data.
weight	Weight matrix.
bias	Bias parameter.
num_hidden	Number of hidden nodes of the output.
no_bias	Whether to disable bias parameter.
flatten	Whether to collapse all but the first axis of the input data tensor.

Returns: new symbol

Symbol mxnet::cpp::FullyConnected	(	Symbol	data,
		Symbol	weight,
		Symbol	bias,
		int	num_hidden,
		bool	no_bias = `false`,
		bool	flatten = `true`
	)

inline

Applies a linear transformation: :math:Y = XW^T + b.

   If ``flatten`` is set to be true, then the shapes are:

   - **data**: `(batch_size, x1, x2, ..., xn)`
   - **weight**: `(num_hidden, x1 * x2 * ... * xn)`
   - **bias**: `(num_hidden,)`
   - **out**: `(batch_size, num_hidden)`

   If ``flatten`` is set to be false, then the shapes are:

   - **data**: `(x1, x2, ..., xn, input_dim)`
   - **weight**: `(num_hidden, input_dim)`
   - **bias**: `(num_hidden,)`
   - **out**: `(x1, x2, ..., xn, num_hidden)`

   The learnable parameters include both ``weight`` and ``bias``.

   If ``no_bias`` is set to be true, then the ``bias`` term is ignored.

   .. Note::

   The sparse support for FullyConnected is limited to forward evaluation with
   weight and bias, where the length of `weight.indices` and `bias.indices` must
   to `num_hidden`. This could be useful for model inference with `row_sparse`
   trained with importance sampling or noise contrastive estimation.

   To compute linear transformation with 'csr' sparse data, sparse.dot is
   of sparse.FullyConnected.



   Defined in src/operator/nn/fully_connected.cc:L272

Parameters

data	Input data.
weight	Weight matrix.
bias	Bias parameter.
num_hidden	Number of hidden nodes of the output.
no_bias	Whether to disable bias parameter.
flatten	Whether to collapse all but the first axis of the input data tensor.

Returns: new symbol

Symbol mxnet::cpp::gamma	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns the gamma function (extension of the factorial function \ to the reals), computed element-wise on the input array.

The storage type of gamma output is always dense

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::gamma ( Symbol data )

inline

Returns the gamma function (extension of the factorial function \ to the reals), computed element-wise on the input array.

The storage type of gamma output is always dense

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::gammaln	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise log of the absolute value of the gamma function \ of the input.

The storage type of gammaln output is always dense

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::gammaln ( Symbol data )

inline

Returns element-wise log of the absolute value of the gamma function \ of the input.

The storage type of gammaln output is always dense

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::gather_nd	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	indices
	)

inline

Gather elements or slices from data and store to a tensor whose shape is defined by indices.

Given data with shape (X_0, X_1, ..., X_{N-1}) and indices with shape (M, Y_0, ..., Y_{K-1}), the output will have shape (Y_0, ..., Y_{K-1}, X_M, whereM <= N. IfM == N, output shape will simply be(Y_0, ..., Y_{K-1})`.

The elements in output is defined as follows::

output[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}] = data[indices[0, y_0, ..., ..., indices[M-1, y_0, ..., y_{K-1}], x_M, ..., x_{N-1}]

Examples::

data = [[0, 1], [2, 3]] indices = [[1, 1, 0], [0, 1, 0]] gather_nd(data, indices) = [2, 3, 0]

data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] indices = [[0, 1], [1, 0]] gather_nd(data, indices) = [[3, 4], [5, 6]]

Parameters

symbol_name	name of the resulting symbol
data	data
indices	indices

Returns: new symbol

Symbol mxnet::cpp::gather_nd	(	Symbol	data,
		Symbol	indices
	)

inline

Gather elements or slices from data and store to a tensor whose shape is defined by indices.

Given data with shape (X_0, X_1, ..., X_{N-1}) and indices with shape (M, Y_0, ..., Y_{K-1}), the output will have shape (Y_0, ..., Y_{K-1}, X_M, whereM <= N. IfM == N, output shape will simply be(Y_0, ..., Y_{K-1})`.

The elements in output is defined as follows::

output[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}] = data[indices[0, y_0, ..., ..., indices[M-1, y_0, ..., y_{K-1}], x_M, ..., x_{N-1}]

Examples::

data = [[0, 1], [2, 3]] indices = [[1, 1, 0], [0, 1, 0]] gather_nd(data, indices) = [2, 3, 0]

data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]] indices = [[0, 1], [1, 0]] gather_nd(data, indices) = [[3, 4], [5, 6]]

Parameters

data	data
indices	indices

Returns: new symbol

Symbol mxnet::cpp::GridGenerator	(	const std::string &	symbol_name,
		Symbol	data,
		GridGeneratorTransformType	transform_type,
		Shape	target_shape = `Shape(0,0)`
	)

inline

Generates 2D sampling grid for bilinear sampling.

Parameters

symbol_name	name of the resulting symbol
data	Input data to the function.
transform_type	The type of transformation. For `affine`, input data should be an affine matrix of size (batch, 6). For `warp`, input data should be an
target_shape	Specifies the output shape (H, W). This is required if transformation type is `affine`. If transformation type is `warp`, this

Returns: new symbol

Symbol mxnet::cpp::GridGenerator	(	Symbol	data,
		GridGeneratorTransformType	transform_type,
		Shape	target_shape = `Shape(0,0)`
	)

inline

Generates 2D sampling grid for bilinear sampling.

Parameters

data	Input data to the function.
transform_type	The type of transformation. For `affine`, input data should be an affine matrix of size (batch, 6). For `warp`, input data should be an
target_shape	Specifies the output shape (H, W). This is required if transformation type is `affine`. If transformation type is `warp`, this

Returns: new symbol

Symbol mxnet::cpp::hard_sigmoid	(	const std::string &	symbol_name,
		Symbol	data,
		mx_float	alpha = `0.2`,
		mx_float	beta = `0.5`
	)

inline

Computes hard sigmoid of x element-wise.

   .. math::
   y = max(0, min(1, alpha * x + beta))



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L115

Parameters

symbol_name	name of the resulting symbol
data	The input array.
alpha	Slope of hard sigmoid
beta	Bias of hard sigmoid.

Returns: new symbol

Symbol mxnet::cpp::hard_sigmoid	(	Symbol	data,
		mx_float	alpha = `0.2`,
		mx_float	beta = `0.5`
	)

inline

Computes hard sigmoid of x element-wise.

   .. math::
   y = max(0, min(1, alpha * x + beta))



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L115

Parameters

data	The input array.
alpha	Slope of hard sigmoid
beta	Bias of hard sigmoid.

Returns: new symbol

Symbol mxnet::cpp::IdentityAttachKLSparseReg	(	const std::string &	symbol_name,
		Symbol	data,
		mx_float	sparseness_target = `0.1`,
		mx_float	penalty = `0.001`,
		mx_float	momentum = `0.9`
	)

inline

Apply a sparse regularization to the output a sigmoid activation function.

Parameters

symbol_name	name of the resulting symbol
data	Input data.
sparseness_target	The sparseness target
penalty	The tradeoff parameter for the sparseness penalty
momentum	The momentum for running average

Returns: new symbol

Symbol mxnet::cpp::IdentityAttachKLSparseReg	(	Symbol	data,
		mx_float	sparseness_target = `0.1`,
		mx_float	penalty = `0.001`,
		mx_float	momentum = `0.9`
	)

inline

Apply a sparse regularization to the output a sigmoid activation function.

Parameters

data	Input data.
sparseness_target	The sparseness target
penalty	The tradeoff parameter for the sparseness penalty
momentum	The momentum for running average

Returns: new symbol

Symbol mxnet::cpp::InstanceNorm	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	gamma,
		Symbol	beta,
		mx_float	eps = `0.001`
	)

inline

Applies instance normalization to the n-dimensional input array.

   This operator takes an n-dimensional input array where (n>2) and normalizes
   the input using the following formula:

   .. math::

   out = \frac{x - mean[data]}{ \sqrt{Var[data]} + \epsilon} * gamma + beta

   This layer is similar to batch normalization layer (`BatchNorm`)
   with two differences: first, the normalization is
   carried out per example (instance), not over a batch. Second, the
   same normalization is applied both at test and train time. This
   operation is also known as `contrast normalization`.

   If the input data is of shape [batch, channel, spacial_dim1, spacial_dim2, ...],
   `gamma` and `beta` parameters must be vectors of shape [channel].

   This implementation is based on paper:

   .. [1] Instance Normalization: The Missing Ingredient for Fast Stylization,
   D. Ulyanov, A. Vedaldi, V. Lempitsky, 2016 (arXiv:1607.08022v2).

   Examples::

   // Input of shape (2,1,2)
   x = [[[ 1.1,  2.2]],
   [[ 3.3,  4.4]]]

   // gamma parameter of length 1
   gamma = [1.5]

   // beta parameter of length 1
   beta = [0.5]

   // Instance normalization is calculated with the above formula
   InstanceNorm(x,gamma,beta) = [[[-0.997527  ,  1.99752665]],
   [[-0.99752653,  1.99752724]]]



   Defined in src/operator/instance_norm.cc:L95

Parameters

symbol_name	name of the resulting symbol
data	An n-dimensional input array (n > 2) of the form [batch, channel,
gamma	A vector of length 'channel', which multiplies the normalized input.
beta	A vector of length 'channel', which is added to the product of the
eps	An `epsilon` parameter to prevent division by 0.

Returns: new symbol

Symbol mxnet::cpp::InstanceNorm	(	Symbol	data,
		Symbol	gamma,
		Symbol	beta,
		mx_float	eps = `0.001`
	)

inline

Applies instance normalization to the n-dimensional input array.

   This operator takes an n-dimensional input array where (n>2) and normalizes
   the input using the following formula:

   .. math::

   out = \frac{x - mean[data]}{ \sqrt{Var[data]} + \epsilon} * gamma + beta

   This layer is similar to batch normalization layer (`BatchNorm`)
   with two differences: first, the normalization is
   carried out per example (instance), not over a batch. Second, the
   same normalization is applied both at test and train time. This
   operation is also known as `contrast normalization`.

   If the input data is of shape [batch, channel, spacial_dim1, spacial_dim2, ...],
   `gamma` and `beta` parameters must be vectors of shape [channel].

   This implementation is based on paper:

   .. [1] Instance Normalization: The Missing Ingredient for Fast Stylization,
   D. Ulyanov, A. Vedaldi, V. Lempitsky, 2016 (arXiv:1607.08022v2).

   Examples::

   // Input of shape (2,1,2)
   x = [[[ 1.1,  2.2]],
   [[ 3.3,  4.4]]]

   // gamma parameter of length 1
   gamma = [1.5]

   // beta parameter of length 1
   beta = [0.5]

   // Instance normalization is calculated with the above formula
   InstanceNorm(x,gamma,beta) = [[[-0.997527  ,  1.99752665]],
   [[-0.99752653,  1.99752724]]]



   Defined in src/operator/instance_norm.cc:L95

Parameters

data	An n-dimensional input array (n > 2) of the form [batch, channel,
gamma	A vector of length 'channel', which multiplies the normalized input.
beta	A vector of length 'channel', which is added to the product of the
eps	An `epsilon` parameter to prevent division by 0.

Returns: new symbol

Symbol mxnet::cpp::khatri_rao	(	const std::string &	symbol_name,
		const std::vector< Symbol > &	args
	)

inline

Computes the Khatri-Rao product of the input matrices.

   Given a collection of :math:`n` input matrices,

   .. math::
   A_1 \in \mathbb{R}^{M_1 \times M}, \ldots, A_n \in \mathbb{R}^{M_n \times N},

   the (column-wise) Khatri-Rao product is defined as the matrix,

   .. math::
   X = A_1 \otimes \cdots \otimes A_n \in \mathbb{R}^{(M_1 \cdots M_n) \times N},

   where the :math:`k` th column is equal to the column-wise outer product
   :math:`{A_1}_k \otimes \cdots \otimes {A_n}_k` where :math:`{A_i}_k` is the kth
   column of the ith matrix.

   Example::

   >>> A = mx.nd.array([[1, -1],
   >>>                  [2, -3]])
   >>> B = mx.nd.array([[1, 4],
   >>>                  [2, 5],
   >>>                  [3, 6]])
   >>> C = mx.nd.khatri_rao(A, B)
   >>> print(C.asnumpy())
   [[  1.  -4.]
   [  2.  -5.]
   [  3.  -6.]
   [  2. -12.]
   [  4. -15.]
   [  6. -18.]]



   Defined in src/operator/contrib/krprod.cc:L108

Parameters

symbol_name	name of the resulting symbol
args	Positional input matrices

Returns: new symbol

Symbol mxnet::cpp::khatri_rao ( const std::vector< Symbol > & args )

inline

Computes the Khatri-Rao product of the input matrices.

   Given a collection of :math:`n` input matrices,

   .. math::
   A_1 \in \mathbb{R}^{M_1 \times M}, \ldots, A_n \in \mathbb{R}^{M_n \times N},

   the (column-wise) Khatri-Rao product is defined as the matrix,

   .. math::
   X = A_1 \otimes \cdots \otimes A_n \in \mathbb{R}^{(M_1 \cdots M_n) \times N},

   where the :math:`k` th column is equal to the column-wise outer product
   :math:`{A_1}_k \otimes \cdots \otimes {A_n}_k` where :math:`{A_i}_k` is the kth
   column of the ith matrix.

   Example::

   >>> A = mx.nd.array([[1, -1],
   >>>                  [2, -3]])
   >>> B = mx.nd.array([[1, 4],
   >>>                  [2, 5],
   >>>                  [3, 6]])
   >>> C = mx.nd.khatri_rao(A, B)
   >>> print(C.asnumpy())
   [[  1.  -4.]
   [  2.  -5.]
   [  3.  -6.]
   [  2. -12.]
   [  4. -15.]
   [  6. -18.]]



   Defined in src/operator/contrib/krprod.cc:L108

Parameters

args	Positional input matrices

Returns: new symbol

Symbol mxnet::cpp::L2Normalization	(	const std::string &	symbol_name,
		Symbol	data,
		mx_float	eps = `1e-10`,
		L2NormalizationMode	mode = `L2NormalizationMode::kInstance`
	)

inline

Normalize the input array using the L2 norm.

   For 1-D NDArray, it computes::

   out = data / sqrt(sum(data ** 2) + eps)

   For N-D NDArray, if the input array has shape (N, N, ..., N),

   with ``mode`` = ``instance``, it normalizes each instance in the
   array by its L2 norm.::

   for i in 0...N
   out[i,:,:,...,:] = data[i,:,:,...,:] / sqrt(sum(data[i,:,:,...,:] ** 2) + eps)

   with ``mode`` = ``channel``, it normalizes each channel in the array by its L2

   for i in 0...N
   out[:,i,:,...,:] = data[:,i,:,...,:] / sqrt(sum(data[:,i,:,...,:] ** 2) + eps)

   with ``mode`` = ``spatial``, it normalizes the cross channel norm for each
   in the array by its L2 norm.::

   for dim in 2...N
   for i in 0...N
   out[.....,i,...] = take(out, indices=i, axis=dim) / sqrt(sum(take(out,
   -dim-

   Example::

   x = [[[1,2],
   [3,4]],
   [[2,2],
   [5,6]]]

   L2Normalization(x, mode='instance')
   =[[[ 0.18257418  0.36514837]
   [ 0.54772252  0.73029673]]
   [[ 0.24077171  0.24077171]
   [ 0.60192931  0.72231513]]]

   L2Normalization(x, mode='channel')
   =[[[ 0.31622776  0.44721359]
   [ 0.94868326  0.89442718]]
   [[ 0.37139067  0.31622776]
   [ 0.92847669  0.94868326]]]

   L2Normalization(x, mode='spatial')
   =[[[ 0.44721359  0.89442718]
   [ 0.60000002  0.80000001]]
   [[ 0.70710677  0.70710677]
   [ 0.6401844   0.76822126]]]



   Defined in src/operator/l2_normalization.cc:L98

Parameters

symbol_name	name of the resulting symbol
data	Input array to normalize.
eps	A small constant for numerical stability.
mode	Specify the dimension along which to compute L2 norm.

Returns: new symbol

Symbol mxnet::cpp::L2Normalization	(	Symbol	data,
		mx_float	eps = `1e-10`,
		L2NormalizationMode	mode = `L2NormalizationMode::kInstance`
	)

inline

Normalize the input array using the L2 norm.

   For 1-D NDArray, it computes::

   out = data / sqrt(sum(data ** 2) + eps)

   For N-D NDArray, if the input array has shape (N, N, ..., N),

   with ``mode`` = ``instance``, it normalizes each instance in the
   array by its L2 norm.::

   for i in 0...N
   out[i,:,:,...,:] = data[i,:,:,...,:] / sqrt(sum(data[i,:,:,...,:] ** 2) + eps)

   with ``mode`` = ``channel``, it normalizes each channel in the array by its L2

   for i in 0...N
   out[:,i,:,...,:] = data[:,i,:,...,:] / sqrt(sum(data[:,i,:,...,:] ** 2) + eps)

   with ``mode`` = ``spatial``, it normalizes the cross channel norm for each
   in the array by its L2 norm.::

   for dim in 2...N
   for i in 0...N
   out[.....,i,...] = take(out, indices=i, axis=dim) / sqrt(sum(take(out,
   -dim-

   Example::

   x = [[[1,2],
   [3,4]],
   [[2,2],
   [5,6]]]

   L2Normalization(x, mode='instance')
   =[[[ 0.18257418  0.36514837]
   [ 0.54772252  0.73029673]]
   [[ 0.24077171  0.24077171]
   [ 0.60192931  0.72231513]]]

   L2Normalization(x, mode='channel')
   =[[[ 0.31622776  0.44721359]
   [ 0.94868326  0.89442718]]
   [[ 0.37139067  0.31622776]
   [ 0.92847669  0.94868326]]]

   L2Normalization(x, mode='spatial')
   =[[[ 0.44721359  0.89442718]
   [ 0.60000002  0.80000001]]
   [[ 0.70710677  0.70710677]
   [ 0.6401844   0.76822126]]]



   Defined in src/operator/l2_normalization.cc:L98

Parameters

data	Input array to normalize.
eps	A small constant for numerical stability.
mode	Specify the dimension along which to compute L2 norm.

Returns: new symbol

Symbol mxnet::cpp::LayerNorm	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	gamma,
		Symbol	beta,
		int	axis = `-1`,
		mx_float	eps = `1e-05`,
		bool	output_mean_var = `false`
	)

inline

Layer normalization.

   Normalizes the channels of the input tensor by mean and variance, and applies a
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis and then
   compute the normalized output, which has the same shape as input, as following:

   .. math::

   out = \frac{data - mean(data, axis)}{\sqrt{var(data, axis) + \epsilon}} * gamma

   Both ``gamma`` and ``beta`` are learnable parameters.

   Unlike BatchNorm and InstanceNorm,  the *mean* and *var* are computed along the

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   ``data_std``. Note that no gradient will be passed through these two outputs.

   The parameter ``axis`` specifies which axis of the input shape denotes
   the 'channel' (separately normalized groups).  The default is -1, which sets
   axis to be the last item in the input shape.



   Defined in src/operator/nn/layer_norm.cc:L94

Parameters

symbol_name	name of the resulting symbol
data	Input data to layer normalization
gamma	gamma array
beta	beta array
axis	The axis to perform layer normalization. Usually, this should be be axis
eps	An `epsilon` parameter to prevent division by 0.
output_mean_var	Output the mean and std calculated along the given axis.

Returns: new symbol

Symbol mxnet::cpp::LayerNorm	(	Symbol	data,
		Symbol	gamma,
		Symbol	beta,
		int	axis = `-1`,
		mx_float	eps = `1e-05`,
		bool	output_mean_var = `false`
	)

inline

Layer normalization.

   Normalizes the channels of the input tensor by mean and variance, and applies a
   well as offset ``beta``.

   Assume the input has more than one dimension and we normalize along axis 1.
   We first compute the mean and variance along this axis and then
   compute the normalized output, which has the same shape as input, as following:

   .. math::

   out = \frac{data - mean(data, axis)}{\sqrt{var(data, axis) + \epsilon}} * gamma

   Both ``gamma`` and ``beta`` are learnable parameters.

   Unlike BatchNorm and InstanceNorm,  the *mean* and *var* are computed along the

   Assume the input has size *k* on axis 1, then both ``gamma`` and ``beta``
   have shape *(k,)*. If ``output_mean_var`` is set to be true, then outputs both
   ``data_std``. Note that no gradient will be passed through these two outputs.

   The parameter ``axis`` specifies which axis of the input shape denotes
   the 'channel' (separately normalized groups).  The default is -1, which sets
   axis to be the last item in the input shape.



   Defined in src/operator/nn/layer_norm.cc:L94

Parameters

data	Input data to layer normalization
gamma	gamma array
beta	beta array
axis	The axis to perform layer normalization. Usually, this should be be axis
eps	An `epsilon` parameter to prevent division by 0.
output_mean_var	Output the mean and std calculated along the given axis.

Returns: new symbol

Symbol mxnet::cpp::LeakyReLU	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	gamma,
		LeakyReLUActType	act_type = `LeakyReLUActType::kLeaky`,
		mx_float	slope = `0.25`,
		mx_float	lower_bound = `0.125`,
		mx_float	upper_bound = `0.334`
	)

inline

Applies Leaky rectified linear unit activation element-wise to the input.

   Leaky ReLUs attempt to fix the "dying ReLU" problem by allowing a small `slope`
   when the input is negative and has a slope of one when input is positive.

   The following modified ReLU Activation functions are supported:

   - *elu*: Exponential Linear Unit. `y = x > 0 ? x : slope * (exp(x)-1)`
   - *selu*: Scaled Exponential Linear Unit. `y = lambda * (x > 0 ? x : alpha *
   *lambda = 1.0507009873554804934193349852946* and *alpha =
   - *leaky*: Leaky ReLU. `y = x > 0 ? x : slope * x`
   - *prelu*: Parametric ReLU. This is same as *leaky* except that `slope` is
   - *rrelu*: Randomized ReLU. same as *leaky* but the `slope` is uniformly and
   *[lower_bound, upper_bound)* for training, while fixed to be
   *(lower_bound+upper_bound)/2* for inference.



   Defined in src/operator/leaky_relu.cc:L65

Parameters

symbol_name	name of the resulting symbol
data	Input data to activation function.
gamma	Slope parameter for PReLU. Only required when act_type is 'prelu'. It should be either a vector of size 1, or the same size as the second dimension
act_type	Activation function to be applied.
slope	Init slope for the activation. (For leaky and elu only)
lower_bound	Lower bound of random slope. (For rrelu only)
upper_bound	Upper bound of random slope. (For rrelu only)

Returns: new symbol

Symbol mxnet::cpp::LeakyReLU	(	Symbol	data,
		Symbol	gamma,
		LeakyReLUActType	act_type = `LeakyReLUActType::kLeaky`,
		mx_float	slope = `0.25`,
		mx_float	lower_bound = `0.125`,
		mx_float	upper_bound = `0.334`
	)

inline

Applies Leaky rectified linear unit activation element-wise to the input.

   Leaky ReLUs attempt to fix the "dying ReLU" problem by allowing a small `slope`
   when the input is negative and has a slope of one when input is positive.

   The following modified ReLU Activation functions are supported:

   - *elu*: Exponential Linear Unit. `y = x > 0 ? x : slope * (exp(x)-1)`
   - *selu*: Scaled Exponential Linear Unit. `y = lambda * (x > 0 ? x : alpha *
   *lambda = 1.0507009873554804934193349852946* and *alpha =
   - *leaky*: Leaky ReLU. `y = x > 0 ? x : slope * x`
   - *prelu*: Parametric ReLU. This is same as *leaky* except that `slope` is
   - *rrelu*: Randomized ReLU. same as *leaky* but the `slope` is uniformly and
   *[lower_bound, upper_bound)* for training, while fixed to be
   *(lower_bound+upper_bound)/2* for inference.



   Defined in src/operator/leaky_relu.cc:L65

Parameters

data	Input data to activation function.
gamma	Slope parameter for PReLU. Only required when act_type is 'prelu'. It should be either a vector of size 1, or the same size as the second dimension
act_type	Activation function to be applied.
slope	Init slope for the activation. (For leaky and elu only)
lower_bound	Lower bound of random slope. (For rrelu only)
upper_bound	Upper bound of random slope. (For rrelu only)

Returns: new symbol

Symbol mxnet::cpp::LinearRegressionOutput	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	label,
		mx_float	grad_scale = `1`
	)

inline

Computes and optimizes for squared loss during backward propagation. Just outputs data during forward propagation.

If :math:\hat{y}_i is the predicted value of the i-th sample, and :math:y_i then the squared loss estimated over :math:n samples is defined as

:math:`{SquaredLoss}({Y}, {{Y}} ) = {1}{n}

.. note:: Use the LinearRegressionOutput as the final output layer of a net.

The storage type of label can be default or csr

LinearRegressionOutput(default, default) = default
LinearRegressionOutput(default, csr) = default

By default, gradients of this loss function are scaled by factor 1/m, where m The parameter grad_scale can be used to change this scale to grad_scale/m.

   Defined in src/operator/regression_output.cc:L92

Parameters

symbol_name	name of the resulting symbol
data	Input data to the function.
label	Input label to the function.
grad_scale	Scale the gradient by a float factor

Returns: new symbol

Symbol mxnet::cpp::LinearRegressionOutput	(	Symbol	data,
		Symbol	label,
		mx_float	grad_scale = `1`
	)

inline

Computes and optimizes for squared loss during backward propagation. Just outputs data during forward propagation.

If :math:\hat{y}_i is the predicted value of the i-th sample, and :math:y_i then the squared loss estimated over :math:n samples is defined as

:math:`{SquaredLoss}({Y}, {{Y}} ) = {1}{n}

.. note:: Use the LinearRegressionOutput as the final output layer of a net.

The storage type of label can be default or csr

LinearRegressionOutput(default, default) = default
LinearRegressionOutput(default, csr) = default

By default, gradients of this loss function are scaled by factor 1/m, where m The parameter grad_scale can be used to change this scale to grad_scale/m.

   Defined in src/operator/regression_output.cc:L92

Parameters

data	Input data to the function.
label	Input label to the function.
grad_scale	Scale the gradient by a float factor

Returns: new symbol

Symbol mxnet::cpp::log	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise Natural logarithmic value of the input.

   The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x``

   The storage type of ``log`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L941

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::log ( Symbol data )

inline

Returns element-wise Natural logarithmic value of the input.

   The natural logarithm is logarithm in base *e*, so that ``log(exp(x)) = x``

   The storage type of ``log`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L941

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::log10	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise Base-10 logarithmic value of the input.

   ``10**log10(x) = x``

   The storage type of ``log10`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L953

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::log10 ( Symbol data )

inline

Returns element-wise Base-10 logarithmic value of the input.

   ``10**log10(x) = x``

   The storage type of ``log10`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L953

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::log1p	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise log(1 + x) value of the input.

   This function is more accurate than ``log(1 + x)``  for small ``x`` so that
   :math:`1+x\approx 1`

   The storage type of ``log1p`` output depends upon the input storage type:

   - log1p(default) = default
   - log1p(row_sparse) = row_sparse
   - log1p(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L990

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::log1p ( Symbol data )

inline

Returns element-wise log(1 + x) value of the input.

   This function is more accurate than ``log(1 + x)``  for small ``x`` so that
   :math:`1+x\approx 1`

   The storage type of ``log1p`` output depends upon the input storage type:

   - log1p(default) = default
   - log1p(row_sparse) = row_sparse
   - log1p(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L990

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::log2	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise Base-2 logarithmic value of the input.

   ``2**log2(x) = x``

   The storage type of ``log2`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L965

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::log2 ( Symbol data )

inline

Returns element-wise Base-2 logarithmic value of the input.

   ``2**log2(x) = x``

   The storage type of ``log2`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L965

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::log_softmax	(	const std::string &	symbol_name,
		Symbol	data,
		int	axis = `-1`,
		dmlc::optional< double >	temperature = `dmlc::optional<double>()`
	)

inline

Computes the log softmax of the input. This is equivalent to computing softmax followed by log.

Examples::

>>> x = mx.nd.array([1, 2, .1]) >>> mx.nd.log_softmax(x).asnumpy() array([-1.41702998, -0.41702995, -2.31702995], dtype=float32)

>>> x = mx.nd.array( [[1, 2, .1],[.1, 2, 1]] ) >>> mx.nd.log_softmax(x, axis=0).asnumpy() array([[-0.34115392, -0.69314718, -1.24115396], [-1.24115396, -0.69314718, -0.34115392]], dtype=float32)

Parameters

symbol_name	name of the resulting symbol
data	The input array.
axis	The axis along which to compute softmax.
temperature	Temperature parameter in softmax

Returns: new symbol

Symbol mxnet::cpp::log_softmax	(	Symbol	data,
		int	axis = `-1`,
		dmlc::optional< double >	temperature = `dmlc::optional<double>()`
	)

inline

Computes the log softmax of the input. This is equivalent to computing softmax followed by log.

Examples::

>>> x = mx.nd.array([1, 2, .1]) >>> mx.nd.log_softmax(x).asnumpy() array([-1.41702998, -0.41702995, -2.31702995], dtype=float32)

>>> x = mx.nd.array( [[1, 2, .1],[.1, 2, 1]] ) >>> mx.nd.log_softmax(x, axis=0).asnumpy() array([[-0.34115392, -0.69314718, -1.24115396], [-1.24115396, -0.69314718, -0.34115392]], dtype=float32)

Parameters

data	The input array.
axis	The axis along which to compute softmax.
temperature	Temperature parameter in softmax

Returns: new symbol

Symbol mxnet::cpp::logical_not	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns the result of logical NOT (!) function

   Example:
   logical_not([-2., 0., 1.]) = [0., 1., 0.]

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::logical_not ( Symbol data )

inline

Returns the result of logical NOT (!) function

   Example:
   logical_not([-2., 0., 1.]) = [0., 1., 0.]

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::LogisticRegressionOutput	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	label,
		mx_float	grad_scale = `1`
	)

inline

Applies a logistic function to the input.

   The logistic function, also known as the sigmoid function, is computed as
   :math:`\frac{1}{1+exp(-\textbf{x})}`.

   Commonly, the sigmoid is used to squash the real-valued output of a linear model
   :math:`wTx+b` into the [0,1] range so that it can be interpreted as a
   It is suitable for binary classification or probability prediction tasks.

   .. note::
   Use the LogisticRegressionOutput as the final output layer of a net.

   The storage type of ``label`` can be ``default`` or ``csr``

   - LogisticRegressionOutput(default, default) = default
   - LogisticRegressionOutput(default, csr) = default

   The loss function used is the Binary Cross Entropy Loss:

   :math:`-{(y\log(p) + (1 - y)\log(1 - p))}`

   Where `y` is the ground truth probability of positive outcome for a given
   example, and `p` the probability predicted by the model. By default, gradients
   of this loss function are scaled by factor `1/m`, where m is the number of
   The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.



   Defined in src/operator/regression_output.cc:L152

Parameters

symbol_name	name of the resulting symbol
data	Input data to the function.
label	Input label to the function.
grad_scale	Scale the gradient by a float factor

Returns: new symbol

Symbol mxnet::cpp::LogisticRegressionOutput	(	Symbol	data,
		Symbol	label,
		mx_float	grad_scale = `1`
	)

inline

Applies a logistic function to the input.

   The logistic function, also known as the sigmoid function, is computed as
   :math:`\frac{1}{1+exp(-\textbf{x})}`.

   Commonly, the sigmoid is used to squash the real-valued output of a linear model
   :math:`wTx+b` into the [0,1] range so that it can be interpreted as a
   It is suitable for binary classification or probability prediction tasks.

   .. note::
   Use the LogisticRegressionOutput as the final output layer of a net.

   The storage type of ``label`` can be ``default`` or ``csr``

   - LogisticRegressionOutput(default, default) = default
   - LogisticRegressionOutput(default, csr) = default

   The loss function used is the Binary Cross Entropy Loss:

   :math:`-{(y\log(p) + (1 - y)\log(1 - p))}`

   Where `y` is the ground truth probability of positive outcome for a given
   example, and `p` the probability predicted by the model. By default, gradients
   of this loss function are scaled by factor `1/m`, where m is the number of
   The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.



   Defined in src/operator/regression_output.cc:L152

Parameters

data	Input data to the function.
label	Input label to the function.
grad_scale	Scale the gradient by a float factor

Returns: new symbol

Symbol mxnet::cpp::LRN	(	const std::string &	symbol_name,
		Symbol	data,
		uint32_t	nsize,
		mx_float	alpha = `0.0001`,
		mx_float	beta = `0.75`,
		mx_float	knorm = `2`
	)

inline

Applies local response normalization to the input.

   The local response normalization layer performs "lateral inhibition" by
   over local input regions.

   If :math:`a_{x,y}^{i}` is the activity of a neuron computed by applying kernel
   :math:`(x, y)` and then applying the ReLU nonlinearity, the response-normalized
   activity :math:`b_{x,y}^{i}` is given by the expression:

   .. math::
   b_{x,y}^{i} = \frac{a_{x,y}^{i}}{\Bigg({k + \frac{\alpha}{n} \sum_{j=max(0,

   where the sum runs over :math:`n` "adjacent" kernel maps at the same spatial
   number of kernels in the layer.



   Defined in src/operator/nn/lrn.cc:L178

Parameters

symbol_name	name of the resulting symbol
data	Input data to LRN
nsize	normalization window width in elements.
alpha	The variance scaling parameter :math:`lpha` in the LRN expression.
beta	The power parameter :math:`eta` in the LRN expression.
knorm	The parameter :math:`k` in the LRN expression.

Returns: new symbol

Symbol mxnet::cpp::LRN	(	Symbol	data,
		uint32_t	nsize,
		mx_float	alpha = `0.0001`,
		mx_float	beta = `0.75`,
		mx_float	knorm = `2`
	)

inline

Applies local response normalization to the input.

   The local response normalization layer performs "lateral inhibition" by
   over local input regions.

   If :math:`a_{x,y}^{i}` is the activity of a neuron computed by applying kernel
   :math:`(x, y)` and then applying the ReLU nonlinearity, the response-normalized
   activity :math:`b_{x,y}^{i}` is given by the expression:

   .. math::
   b_{x,y}^{i} = \frac{a_{x,y}^{i}}{\Bigg({k + \frac{\alpha}{n} \sum_{j=max(0,

   where the sum runs over :math:`n` "adjacent" kernel maps at the same spatial
   number of kernels in the layer.



   Defined in src/operator/nn/lrn.cc:L178

Parameters

data	Input data to LRN
nsize	normalization window width in elements.
alpha	The variance scaling parameter :math:`lpha` in the LRN expression.
beta	The power parameter :math:`eta` in the LRN expression.
knorm	The parameter :math:`k` in the LRN expression.

Returns: new symbol

Symbol mxnet::cpp::MAERegressionOutput	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	label,
		mx_float	grad_scale = `1`
	)

inline

Computes mean absolute error of the input.

   MAE is a risk metric corresponding to the expected value of the absolute error.

   If :math:`\hat{y}_i` is the predicted value of the i-th sample, and :math:`y_i`
   then the mean absolute error (MAE) estimated over :math:`n` samples is defined

   :math:`\text{MAE}(\textbf{Y}, \hat{\textbf{Y}} ) = \frac{1}{n} \sum_{i=0}^{n-1}

   .. note::
   Use the MAERegressionOutput as the final output layer of a net.

   The storage type of ``label`` can be ``default`` or ``csr``

   - MAERegressionOutput(default, default) = default
   - MAERegressionOutput(default, csr) = default

   By default, gradients of this loss function are scaled by factor `1/m`, where m
   The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.



   Defined in src/operator/regression_output.cc:L120

Parameters

symbol_name	name of the resulting symbol
data	Input data to the function.
label	Input label to the function.
grad_scale	Scale the gradient by a float factor

Returns: new symbol

Symbol mxnet::cpp::MAERegressionOutput	(	Symbol	data,
		Symbol	label,
		mx_float	grad_scale = `1`
	)

inline

Computes mean absolute error of the input.

   MAE is a risk metric corresponding to the expected value of the absolute error.

   If :math:`\hat{y}_i` is the predicted value of the i-th sample, and :math:`y_i`
   then the mean absolute error (MAE) estimated over :math:`n` samples is defined

   :math:`\text{MAE}(\textbf{Y}, \hat{\textbf{Y}} ) = \frac{1}{n} \sum_{i=0}^{n-1}

   .. note::
   Use the MAERegressionOutput as the final output layer of a net.

   The storage type of ``label`` can be ``default`` or ``csr``

   - MAERegressionOutput(default, default) = default
   - MAERegressionOutput(default, csr) = default

   By default, gradients of this loss function are scaled by factor `1/m`, where m
   The parameter `grad_scale` can be used to change this scale to `grad_scale/m`.



   Defined in src/operator/regression_output.cc:L120

Parameters

data	Input data to the function.
label	Input label to the function.
grad_scale	Scale the gradient by a float factor

Returns: new symbol

Symbol mxnet::cpp::make_loss	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Make your own loss function in network construction.

   This operator accepts a customized loss function symbol as a terminal loss and
   the symbol should be an operator with no backward dependency.
   The output of this function is the gradient of loss with respect to the input

   For example, if you are a making a cross entropy loss function. Assume ``out``
   predicted output and ``label`` is the true label, then the cross entropy can be

   cross_entropy = label * log(out) + (1 - label) * log(1 - out)
   loss = make_loss(cross_entropy)

   We will need to use ``make_loss`` when we are creating our own loss function or
   combine multiple loss functions. Also we may want to stop some variables'
   from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.

   The storage type of ``make_loss`` output depends upon the input storage type:

   - make_loss(default) = default
   - make_loss(row_sparse) = row_sparse



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L298

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::make_loss ( Symbol data )

inline

Make your own loss function in network construction.

   This operator accepts a customized loss function symbol as a terminal loss and
   the symbol should be an operator with no backward dependency.
   The output of this function is the gradient of loss with respect to the input

   For example, if you are a making a cross entropy loss function. Assume ``out``
   predicted output and ``label`` is the true label, then the cross entropy can be

   cross_entropy = label * log(out) + (1 - label) * log(1 - out)
   loss = make_loss(cross_entropy)

   We will need to use ``make_loss`` when we are creating our own loss function or
   combine multiple loss functions. Also we may want to stop some variables'
   from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.

   The storage type of ``make_loss`` output depends upon the input storage type:

   - make_loss(default) = default
   - make_loss(row_sparse) = row_sparse



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L298

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::MakeLoss	(	const std::string &	symbol_name,
		Symbol	data,
		mx_float	grad_scale = `1`,
		mx_float	valid_thresh = `0`,
		MakeLossNormalization	normalization = `MakeLossNormalization::kNull`
	)

inline

Make your own loss function in network construction.

   This operator accepts a customized loss function symbol as a terminal loss and
   the symbol should be an operator with no backward dependency.
   The output of this function is the gradient of loss with respect to the input

   For example, if you are a making a cross entropy loss function. Assume ``out``
   predicted output and ``label`` is the true label, then the cross entropy can be

   cross_entropy = label * log(out) + (1 - label) * log(1 - out)
   loss = MakeLoss(cross_entropy)

   We will need to use ``MakeLoss`` when we are creating our own loss function or
   combine multiple loss functions. Also we may want to stop some variables'
   from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.

   In addition, we can give a scale to the loss by setting ``grad_scale``,
   so that the gradient of the loss will be rescaled in the backpropagation.

   .. note:: This operator should be used as a Symbol instead of NDArray.



   Defined in src/operator/make_loss.cc:L71

Parameters

symbol_name	name of the resulting symbol
data	Input array.
grad_scale	Gradient scale as a supplement to unary and binary operators
valid_thresh	clip each element in the array to 0 when it is less than
normalization	If this is set to null, the output gradient will not be normalized. If this is set to batch, the output gradient will be divided by the batch size. If this is set to valid, the output gradient will be divided by the

Returns: new symbol

Symbol mxnet::cpp::MakeLoss	(	Symbol	data,
		mx_float	grad_scale = `1`,
		mx_float	valid_thresh = `0`,
		MakeLossNormalization	normalization = `MakeLossNormalization::kNull`
	)

inline

Make your own loss function in network construction.

   This operator accepts a customized loss function symbol as a terminal loss and
   the symbol should be an operator with no backward dependency.
   The output of this function is the gradient of loss with respect to the input

   For example, if you are a making a cross entropy loss function. Assume ``out``
   predicted output and ``label`` is the true label, then the cross entropy can be

   cross_entropy = label * log(out) + (1 - label) * log(1 - out)
   loss = MakeLoss(cross_entropy)

   We will need to use ``MakeLoss`` when we are creating our own loss function or
   combine multiple loss functions. Also we may want to stop some variables'
   from backpropagation. See more detail in ``BlockGrad`` or ``stop_gradient``.

   In addition, we can give a scale to the loss by setting ``grad_scale``,
   so that the gradient of the loss will be rescaled in the backpropagation.

   .. note:: This operator should be used as a Symbol instead of NDArray.



   Defined in src/operator/make_loss.cc:L71

Parameters

data	Input array.
grad_scale	Gradient scale as a supplement to unary and binary operators
valid_thresh	clip each element in the array to 0 when it is less than
normalization	If this is set to null, the output gradient will not be normalized. If this is set to batch, the output gradient will be divided by the batch size. If this is set to valid, the output gradient will be divided by the

Returns: new symbol

Symbol mxnet::cpp::max	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the max of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L190

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::max	(	Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the max of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L190

Parameters

data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::mean	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the mean of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L131

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::mean	(	Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the mean of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L131

Parameters

data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::min	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the min of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L204

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::min	(	Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the min of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L204

Parameters

data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::mp_sgd_mom_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		Symbol	mom,
		Symbol	weight32,
		mx_float	lr,
		mx_float	momentum = `0`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Updater function for multi-precision sgd optimizer

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
mom	Momentum
weight32	Weight32
lr	Learning rate
momentum	The decay rate of momentum estimates at each epoch.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse

Returns: new symbol

Symbol mxnet::cpp::mp_sgd_mom_update	(	Symbol	weight,
		Symbol	grad,
		Symbol	mom,
		Symbol	weight32,
		mx_float	lr,
		mx_float	momentum = `0`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Updater function for multi-precision sgd optimizer

Parameters

weight	Weight
grad	Gradient
mom	Momentum
weight32	Weight32
lr	Learning rate
momentum	The decay rate of momentum estimates at each epoch.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse

Returns: new symbol

Symbol mxnet::cpp::mp_sgd_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		Symbol	weight32,
		mx_float	lr,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Updater function for multi-precision sgd optimizer

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	gradient
weight32	Weight32
lr	Learning rate
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse.

Returns: new symbol

Symbol mxnet::cpp::mp_sgd_update	(	Symbol	weight,
		Symbol	grad,
		Symbol	weight32,
		mx_float	lr,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Updater function for multi-precision sgd optimizer

Parameters

weight	Weight
grad	gradient
weight32	Weight32
lr	Learning rate
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse.

Returns: new symbol

Symbol mxnet::cpp::nanprod	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the product of array elements over given axes treating Not a Numbers

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L176

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::nanprod	(	Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the product of array elements over given axes treating Not a Numbers

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L176

Parameters

data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::nansum	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the sum of array elements over given axes treating Not a Numbers

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L161

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::nansum	(	Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the sum of array elements over given axes treating Not a Numbers

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L161

Parameters

data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::negative	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Numerical negative of the argument, element-wise.

   The storage type of ``negative`` output depends upon the input storage type:

   - negative(default) = default
   - negative(row_sparse) = row_sparse
   - negative(csr) = csr

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::negative ( Symbol data )

inline

Numerical negative of the argument, element-wise.

   The storage type of ``negative`` output depends upon the input storage type:

   - negative(default) = default
   - negative(row_sparse) = row_sparse
   - negative(csr) = csr

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::norm	(	const std::string &	symbol_name,
		Symbol	data,
		int	ord = `2`,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`
	)

inline

Computes the norm on an NDArray.

   This operator computes the norm on an NDArray with the specified axis, depending
   on the value of the ord parameter. By default, it computes the L2 norm on the
   array. Currently only ord=2 supports sparse ndarrays.

   Examples::

   x = [[[1, 2],
   [3, 4]],
   [[2, 2],
   [5, 6]]]

   norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ]
   [5.3851647 6.3245554]]

   norm(x, ord=1, axis=1) = [[4., 6.],
   [7., 8.]]

   rsp = x.cast_storage('row_sparse')

   norm(rsp) = [5.47722578]

   csr = x.cast_storage('csr')

   norm(csr) = [5.47722578]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L345

Parameters

symbol_name	name of the resulting symbol
data	The input
ord	Order of the norm. Currently ord=1 and ord=2 is supported.
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a 2-tuple, it specifies the axes that hold 2-D matrices, and the matrix norms of these matrices are computed.
keepdims	If this is set to `True`, the reduced axis is left in the result as

Returns: new symbol

Symbol mxnet::cpp::norm	(	Symbol	data,
		int	ord = `2`,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`
	)

inline

Computes the norm on an NDArray.

   This operator computes the norm on an NDArray with the specified axis, depending
   on the value of the ord parameter. By default, it computes the L2 norm on the
   array. Currently only ord=2 supports sparse ndarrays.

   Examples::

   x = [[[1, 2],
   [3, 4]],
   [[2, 2],
   [5, 6]]]

   norm(x, ord=2, axis=1) = [[3.1622777 4.472136 ]
   [5.3851647 6.3245554]]

   norm(x, ord=1, axis=1) = [[4., 6.],
   [7., 8.]]

   rsp = x.cast_storage('row_sparse')

   norm(rsp) = [5.47722578]

   csr = x.cast_storage('csr')

   norm(csr) = [5.47722578]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L345

Parameters

data	The input
ord	Order of the norm. Currently ord=1 and ord=2 is supported.
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a 2-tuple, it specifies the axes that hold 2-D matrices, and the matrix norms of these matrices are computed.
keepdims	If this is set to `True`, the reduced axis is left in the result as

Returns: new symbol

Symbol mxnet::cpp::one_hot	(	const std::string &	symbol_name,
		Symbol	indices,
		int	depth,
		double	on_value = `1`,
		double	off_value = `0`,
		One_hotDtype	dtype = `One_hotDtype::kFloat32`
	)

inline

Returns a one-hot array.

   The locations represented by `indices` take value `on_value`, while all
   other locations take value `off_value`.

   `one_hot` operation with `indices` of shape ``(i0, i1)`` and `depth`  of ``d``
   in an output array of shape ``(i0, i1, d)`` with::

   output[i,j,:] = off_value
   output[i,j,indices[i,j]] = on_value

   Examples::

   one_hot([1,0,2,0], 3) = [[ 0.  1.  0.]
   [ 1.  0.  0.]
   [ 0.  0.  1.]
   [ 1.  0.  0.]]

   one_hot([1,0,2,0], 3, on_value=8, off_value=1,
   dtype='int32') = [[1 8 1]
   [8 1 1]
   [1 1 8]
   [8 1 1]]

   one_hot([[1,0],[1,0],[2,0]], 3) = [[[ 0.  1.  0.]
   [ 1.  0.  0.]]

   [[ 0.  1.  0.]
   [ 1.  0.  0.]]

   [[ 0.  0.  1.]
   [ 1.  0.  0.]]]


   Defined in src/operator/tensor/indexing_op.cc:L536

Parameters

symbol_name	name of the resulting symbol
indices	array of locations where to set on_value
depth	Depth of the one hot dimension.
on_value	The value assigned to the locations represented by indices.
off_value	The value assigned to the locations not represented by indices.
dtype	DType of the output

Returns: new symbol

Symbol mxnet::cpp::one_hot	(	Symbol	indices,
		int	depth,
		double	on_value = `1`,
		double	off_value = `0`,
		One_hotDtype	dtype = `One_hotDtype::kFloat32`
	)

inline

Returns a one-hot array.

   The locations represented by `indices` take value `on_value`, while all
   other locations take value `off_value`.

   `one_hot` operation with `indices` of shape ``(i0, i1)`` and `depth`  of ``d``
   in an output array of shape ``(i0, i1, d)`` with::

   output[i,j,:] = off_value
   output[i,j,indices[i,j]] = on_value

   Examples::

   one_hot([1,0,2,0], 3) = [[ 0.  1.  0.]
   [ 1.  0.  0.]
   [ 0.  0.  1.]
   [ 1.  0.  0.]]

   one_hot([1,0,2,0], 3, on_value=8, off_value=1,
   dtype='int32') = [[1 8 1]
   [8 1 1]
   [1 1 8]
   [8 1 1]]

   one_hot([[1,0],[1,0],[2,0]], 3) = [[[ 0.  1.  0.]
   [ 1.  0.  0.]]

   [[ 0.  1.  0.]
   [ 1.  0.  0.]]

   [[ 0.  0.  1.]
   [ 1.  0.  0.]]]


   Defined in src/operator/tensor/indexing_op.cc:L536

Parameters

indices	array of locations where to set on_value
depth	Depth of the one hot dimension.
on_value	The value assigned to the locations represented by indices.
off_value	The value assigned to the locations not represented by indices.
dtype	DType of the output

Returns: new symbol

Symbol mxnet::cpp::ones_like	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Return an array of ones with the same shape and type as the input array.

Examples::

x = [[ 0., 0., 0.], [ 0., 0., 0.]]

ones_like(x) = [[ 1., 1., 1.], [ 1., 1., 1.]]

Parameters

symbol_name	name of the resulting symbol
data	The input

Returns: new symbol

Symbol mxnet::cpp::ones_like ( Symbol data )

inline

Return an array of ones with the same shape and type as the input array.

Examples::

x = [[ 0., 0., 0.], [ 0., 0., 0.]]

ones_like(x) = [[ 1., 1., 1.], [ 1., 1., 1.]]

Parameters

data The input

Returns: new symbol

Symbol mxnet::cpp::operator%	(	mx_float	lhs,
		const Symbol &	rhs
	)

Symbol mxnet::cpp::operator*	(	mx_float	lhs,
		const Symbol &	rhs
	)

Symbol mxnet::cpp::operator+	(	mx_float	lhs,
		const Symbol &	rhs
	)

Symbol mxnet::cpp::operator-	(	mx_float	lhs,
		const Symbol &	rhs
	)

Symbol mxnet::cpp::operator/	(	mx_float	lhs,
		const Symbol &	rhs
	)

std::ostream& mxnet::cpp::operator<<	(	std::ostream &	os,
		const Shape &	shape
	)

inline

allow string printing of the shape

Parameters

os	the output stream
shape	the shape

Returns: the ostream

std::ostream& mxnet::cpp::operator<<	(	std::ostream &	out,
		const NDArray &	ndarray
	)

std::istream& mxnet::cpp::operator>>	(	std::istream &	is,
		Shape &	shape
	)

inline

read shape from the istream

Parameters

is	the input stream
shape	the shape

Returns: the istream

Symbol mxnet::cpp::Pad	(	const std::string &	symbol_name,
		Symbol	data,
		PadMode	mode,
		Shape	pad_width,
		double	constant_value = `0`
	)

inline

Pads an input array with a constant or edge values of the array.

   .. note:: `Pad` is deprecated. Use `pad` instead.

   .. note:: Current implementation only supports 4D and 5D input arrays with
   only on axes 1, 2 and 3. Expects axes 4 and 5 in `pad_width` to be zero.

   This operation pads an input array with either a `constant_value` or edge values
   along each axis of the input array. The amount of padding is specified by

   `pad_width` is a tuple of integer padding widths for each axis of the format
   ``(before_1, after_1, ... , before_N, after_N)``. The `pad_width` should be of
   where ``N`` is the number of dimensions of the array.

   For dimension ``N`` of the input array, ``before_N`` and ``after_N`` indicates
   to add before and after the elements of the array along dimension ``N``.
   The widths of the higher two dimensions ``before_1``, ``after_1``, ``before_2``,
   ``after_2`` must be 0.

   Example::

   x = [[[[  1.   2.   3.]
   [  4.   5.   6.]]

   [[  7.   8.   9.]
   [ 10.  11.  12.]]]


   [[[ 11.  12.  13.]
   [ 14.  15.  16.]]

   [[ 17.  18.  19.]
   [ 20.  21.  22.]]]]

   pad(x,mode="edge", pad_width=(0,0,0,0,1,1,1,1)) =

   [[[[  1.   1.   2.   3.   3.]
   [  1.   1.   2.   3.   3.]
   [  4.   4.   5.   6.   6.]
   [  4.   4.   5.   6.   6.]]

   [[  7.   7.   8.   9.   9.]
   [  7.   7.   8.   9.   9.]
   [ 10.  10.  11.  12.  12.]
   [ 10.  10.  11.  12.  12.]]]


   [[[ 11.  11.  12.  13.  13.]
   [ 11.  11.  12.  13.  13.]
   [ 14.  14.  15.  16.  16.]
   [ 14.  14.  15.  16.  16.]]

   [[ 17.  17.  18.  19.  19.]
   [ 17.  17.  18.  19.  19.]
   [ 20.  20.  21.  22.  22.]
   [ 20.  20.  21.  22.  22.]]]]

   pad(x, mode="constant", constant_value=0, pad_width=(0,0,0,0,1,1,1,1)) =

   [[[[  0.   0.   0.   0.   0.]
   [  0.   1.   2.   3.   0.]
   [  0.   4.   5.   6.   0.]
   [  0.   0.   0.   0.   0.]]

   [[  0.   0.   0.   0.   0.]
   [  0.   7.   8.   9.   0.]
   [  0.  10.  11.  12.   0.]
   [  0.   0.   0.   0.   0.]]]


   [[[  0.   0.   0.   0.   0.]
   [  0.  11.  12.  13.   0.]
   [  0.  14.  15.  16.   0.]
   [  0.   0.   0.   0.   0.]]

   [[  0.   0.   0.   0.   0.]
   [  0.  17.  18.  19.   0.]
   [  0.  20.  21.  22.   0.]
   [  0.   0.   0.   0.   0.]]]]




   Defined in src/operator/pad.cc:L766

Parameters

symbol_name	name of the resulting symbol
data	An n-dimensional input array.
mode	Padding type to use. "constant" pads with `constant_value` "edge" pads using the edge values of the input array "reflect" pads by reflecting values
pad_width	Widths of the padding regions applied to the edges of each axis. It is a tuple of integer padding widths for each axis of the format `(before_1, after_1, ... , before_N, after_N)`. It should be of length `2*N` where `N` is the number of dimensions of the array.This is equivalent to pad_width in
constant_value	The value used for padding when `mode` is "constant".

Returns: new symbol

Symbol mxnet::cpp::Pad	(	Symbol	data,
		PadMode	mode,
		Shape	pad_width,
		double	constant_value = `0`
	)

inline

Pads an input array with a constant or edge values of the array.

   .. note:: `Pad` is deprecated. Use `pad` instead.

   .. note:: Current implementation only supports 4D and 5D input arrays with
   only on axes 1, 2 and 3. Expects axes 4 and 5 in `pad_width` to be zero.

   This operation pads an input array with either a `constant_value` or edge values
   along each axis of the input array. The amount of padding is specified by

   `pad_width` is a tuple of integer padding widths for each axis of the format
   ``(before_1, after_1, ... , before_N, after_N)``. The `pad_width` should be of
   where ``N`` is the number of dimensions of the array.

   For dimension ``N`` of the input array, ``before_N`` and ``after_N`` indicates
   to add before and after the elements of the array along dimension ``N``.
   The widths of the higher two dimensions ``before_1``, ``after_1``, ``before_2``,
   ``after_2`` must be 0.

   Example::

   x = [[[[  1.   2.   3.]
   [  4.   5.   6.]]

   [[  7.   8.   9.]
   [ 10.  11.  12.]]]


   [[[ 11.  12.  13.]
   [ 14.  15.  16.]]

   [[ 17.  18.  19.]
   [ 20.  21.  22.]]]]

   pad(x,mode="edge", pad_width=(0,0,0,0,1,1,1,1)) =

   [[[[  1.   1.   2.   3.   3.]
   [  1.   1.   2.   3.   3.]
   [  4.   4.   5.   6.   6.]
   [  4.   4.   5.   6.   6.]]

   [[  7.   7.   8.   9.   9.]
   [  7.   7.   8.   9.   9.]
   [ 10.  10.  11.  12.  12.]
   [ 10.  10.  11.  12.  12.]]]


   [[[ 11.  11.  12.  13.  13.]
   [ 11.  11.  12.  13.  13.]
   [ 14.  14.  15.  16.  16.]
   [ 14.  14.  15.  16.  16.]]

   [[ 17.  17.  18.  19.  19.]
   [ 17.  17.  18.  19.  19.]
   [ 20.  20.  21.  22.  22.]
   [ 20.  20.  21.  22.  22.]]]]

   pad(x, mode="constant", constant_value=0, pad_width=(0,0,0,0,1,1,1,1)) =

   [[[[  0.   0.   0.   0.   0.]
   [  0.   1.   2.   3.   0.]
   [  0.   4.   5.   6.   0.]
   [  0.   0.   0.   0.   0.]]

   [[  0.   0.   0.   0.   0.]
   [  0.   7.   8.   9.   0.]
   [  0.  10.  11.  12.   0.]
   [  0.   0.   0.   0.   0.]]]


   [[[  0.   0.   0.   0.   0.]
   [  0.  11.  12.  13.   0.]
   [  0.  14.  15.  16.   0.]
   [  0.   0.   0.   0.   0.]]

   [[  0.   0.   0.   0.   0.]
   [  0.  17.  18.  19.   0.]
   [  0.  20.  21.  22.   0.]
   [  0.   0.   0.   0.   0.]]]]




   Defined in src/operator/pad.cc:L766

Parameters

data	An n-dimensional input array.
mode	Padding type to use. "constant" pads with `constant_value` "edge" pads using the edge values of the input array "reflect" pads by reflecting values
pad_width	Widths of the padding regions applied to the edges of each axis. It is a tuple of integer padding widths for each axis of the format `(before_1, after_1, ... , before_N, after_N)`. It should be of length `2*N` where `N` is the number of dimensions of the array.This is equivalent to pad_width in
constant_value	The value used for padding when `mode` is "constant".

Returns: new symbol

Symbol mxnet::cpp::pick	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	index,
		dmlc::optional< int >	axis = `dmlc::optional<int>(-1)`,
		bool	keepdims = `false`,
		PickMode	mode = `PickMode::kClip`
	)

inline

Picks elements from an input array according to the input indices along the

   Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
   an output array of shape ``(i0,)`` with::

   output[i] = input[i, indices[i]]

   By default, if any index mentioned is too large, it is replaced by the index
   the last element along an axis (the `clip` mode).

   This function supports n-dimensional input and (n-1)-dimensional indices arrays.

   Examples::

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // picks elements with specified indices along axis 0
   pick(x, y=[0,1], 0) = [ 1.,  4.]

   // picks elements with specified indices along axis 1
   pick(x, y=[0,1,0], 1) = [ 1.,  4.,  5.]

   y = [[ 1.],
   [ 0.],
   [ 2.]]

   // picks elements with specified indices along axis 1 using 'wrap' mode
   // to place indicies that would normally be out of bounds
   pick(x, y=[2,-1,-2], 1, mode='wrap') = [ 1.,  4.,  5.]

   y = [[ 1.],
   [ 0.],
   [ 2.]]

   // picks elements with specified indices along axis 1 and dims are maintained
   pick(x,y, 1, keepdims=True) = [[ 2.],
   [ 3.],
   [ 6.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L153

Parameters

symbol_name	name of the resulting symbol
data	The input array
index	The index array
axis	int or None. The axis to picking the elements. Negative values means indexing from right to left. If is `None`, the elements in the index w.r.t the
keepdims	If true, the axis where we pick the elements is left in the result as
mode	Specify how out-of-bound indices behave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap"

Returns: new symbol

Symbol mxnet::cpp::pick	(	Symbol	data,
		Symbol	index,
		dmlc::optional< int >	axis = `dmlc::optional<int>(-1)`,
		bool	keepdims = `false`,
		PickMode	mode = `PickMode::kClip`
	)

inline

Picks elements from an input array according to the input indices along the

   Given an input array of shape ``(d0, d1)`` and indices of shape ``(i0,)``, the
   an output array of shape ``(i0,)`` with::

   output[i] = input[i, indices[i]]

   By default, if any index mentioned is too large, it is replaced by the index
   the last element along an axis (the `clip` mode).

   This function supports n-dimensional input and (n-1)-dimensional indices arrays.

   Examples::

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // picks elements with specified indices along axis 0
   pick(x, y=[0,1], 0) = [ 1.,  4.]

   // picks elements with specified indices along axis 1
   pick(x, y=[0,1,0], 1) = [ 1.,  4.,  5.]

   y = [[ 1.],
   [ 0.],
   [ 2.]]

   // picks elements with specified indices along axis 1 using 'wrap' mode
   // to place indicies that would normally be out of bounds
   pick(x, y=[2,-1,-2], 1, mode='wrap') = [ 1.,  4.,  5.]

   y = [[ 1.],
   [ 0.],
   [ 2.]]

   // picks elements with specified indices along axis 1 and dims are maintained
   pick(x,y, 1, keepdims=True) = [[ 2.],
   [ 3.],
   [ 6.]]



   Defined in src/operator/tensor/broadcast_reduce_op_index.cc:L153

Parameters

data	The input array
index	The index array
axis	int or None. The axis to picking the elements. Negative values means indexing from right to left. If is `None`, the elements in the index w.r.t the
keepdims	If true, the axis where we pick the elements is left in the result as
mode	Specify how out-of-bound indices behave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap"

Returns: new symbol

Symbol mxnet::cpp::Pooling	(	const std::string &	symbol_name,
		Symbol	data,
		Shape	kernel = `Shape()`,
		PoolingPoolType	pool_type = `PoolingPoolType::kMax`,
		bool	global_pool = `false`,
		bool	cudnn_off = `false`,
		PoolingPoolingConvention	pooling_convention = `PoolingPoolingConvention::kValid`,
		Shape	stride = `Shape()`,
		Shape	pad = `Shape()`,
		dmlc::optional< int >	p_value = `dmlc::optional<int>()`,
		dmlc::optional< bool >	count_include_pad = `dmlc::optional<bool>()`
	)

inline

Performs pooling on the input.

   The shapes for 1-D pooling are

   - **data**: *(batch_size, channel, width)*,
   - **out**: *(batch_size, num_filter, out_width)*.

   The shapes for 2-D pooling are

   - **data**: *(batch_size, channel, height, width)*
   - **out**: *(batch_size, num_filter, out_height, out_width)*, with::

   out_height = f(height, kernel[0], pad[0], stride[0])
   out_width = f(width, kernel[1], pad[1], stride[1])

   The definition of *f* depends on ``pooling_convention``, which has two options:

   - **valid** (default)::

   f(x, k, p, s) = floor((x+2*p-k)/s)+1

   - **full**, which is compatible with Caffe::

   f(x, k, p, s) = ceil((x+2*p-k)/s)+1

   But ``global_pool`` is set to be true, then do a global pooling, namely reset
   ``kernel=(height, width)``.

   Three pooling options are supported by ``pool_type``:

   - **avg**: average pooling
   - **max**: max pooling
   - **sum**: sum pooling
   - **lp**: Lp pooling

   For 3-D pooling, an additional *depth* dimension is added before
   *height*. Namely the input data will have shape *(batch_size, channel, depth,
   height, width)*.

   Notes on Lp pooling:

   Lp pooling was first introduced by this paper:
   L-1 pooling is simply sum pooling, while L-inf pooling is simply max pooling.
   We can see that Lp pooling stands between those two, in practice the most

   For each window ``X``, the mathematical expression for Lp pooling is:

   :math:`f(X) = \sqrt[p]{\sum_{x}^{X} x^p}`



   Defined in src/operator/nn/pooling.cc:L388

Parameters

symbol_name	name of the resulting symbol
data	Input data to the pooling operator.
kernel	Pooling kernel size: (y, x) or (d, y, x)
pool_type	Pooling type to be applied.
global_pool	Ignore kernel size, do global pooling based on current input
cudnn_off	Turn off cudnn pooling and use MXNet pooling operator.
pooling_convention	Pooling convention to be applied.
stride	Stride: for pooling (y, x) or (d, y, x). Defaults to 1 for each
pad	Pad for pooling: (y, x) or (d, y, x). Defaults to no padding.
p_value	Value of p for Lp pooling, can be 1 or 2, required for Lp Pooling.
count_include_pad	Only used for AvgPool, specify whether to count padding elements for averagecalculation. For example, with a 55 kernel on a 33 corner of a image,the sum of the 9 valid elements will be divided by 25 if this is set

Returns: new symbol

Symbol mxnet::cpp::Pooling	(	Symbol	data,
		Shape	kernel = `Shape()`,
		PoolingPoolType	pool_type = `PoolingPoolType::kMax`,
		bool	global_pool = `false`,
		bool	cudnn_off = `false`,
		PoolingPoolingConvention	pooling_convention = `PoolingPoolingConvention::kValid`,
		Shape	stride = `Shape()`,
		Shape	pad = `Shape()`,
		dmlc::optional< int >	p_value = `dmlc::optional<int>()`,
		dmlc::optional< bool >	count_include_pad = `dmlc::optional<bool>()`
	)

inline

Performs pooling on the input.

   The shapes for 1-D pooling are

   - **data**: *(batch_size, channel, width)*,
   - **out**: *(batch_size, num_filter, out_width)*.

   The shapes for 2-D pooling are

   - **data**: *(batch_size, channel, height, width)*
   - **out**: *(batch_size, num_filter, out_height, out_width)*, with::

   out_height = f(height, kernel[0], pad[0], stride[0])
   out_width = f(width, kernel[1], pad[1], stride[1])

   The definition of *f* depends on ``pooling_convention``, which has two options:

   - **valid** (default)::

   f(x, k, p, s) = floor((x+2*p-k)/s)+1

   - **full**, which is compatible with Caffe::

   f(x, k, p, s) = ceil((x+2*p-k)/s)+1

   But ``global_pool`` is set to be true, then do a global pooling, namely reset
   ``kernel=(height, width)``.

   Three pooling options are supported by ``pool_type``:

   - **avg**: average pooling
   - **max**: max pooling
   - **sum**: sum pooling
   - **lp**: Lp pooling

   For 3-D pooling, an additional *depth* dimension is added before
   *height*. Namely the input data will have shape *(batch_size, channel, depth,
   height, width)*.

   Notes on Lp pooling:

   Lp pooling was first introduced by this paper:
   L-1 pooling is simply sum pooling, while L-inf pooling is simply max pooling.
   We can see that Lp pooling stands between those two, in practice the most

   For each window ``X``, the mathematical expression for Lp pooling is:

   :math:`f(X) = \sqrt[p]{\sum_{x}^{X} x^p}`



   Defined in src/operator/nn/pooling.cc:L388

Parameters

data	Input data to the pooling operator.
kernel	Pooling kernel size: (y, x) or (d, y, x)
pool_type	Pooling type to be applied.
global_pool	Ignore kernel size, do global pooling based on current input
cudnn_off	Turn off cudnn pooling and use MXNet pooling operator.
pooling_convention	Pooling convention to be applied.
stride	Stride: for pooling (y, x) or (d, y, x). Defaults to 1 for each
pad	Pad for pooling: (y, x) or (d, y, x). Defaults to no padding.
p_value	Value of p for Lp pooling, can be 1 or 2, required for Lp Pooling.
count_include_pad	Only used for AvgPool, specify whether to count padding elements for averagecalculation. For example, with a 55 kernel on a 33 corner of a image,the sum of the 9 valid elements will be divided by 25 if this is set

Returns: new symbol

Symbol mxnet::cpp::Pooling_v1	(	const std::string &	symbol_name,
		Symbol	data,
		Shape	kernel = `Shape()`,
		Pooling_v1PoolType	pool_type = `Pooling_v1PoolType::kMax`,
		bool	global_pool = `false`,
		Pooling_v1PoolingConvention	pooling_convention = `Pooling_v1PoolingConvention::kValid`,
		Shape	stride = `Shape()`,
		Shape	pad = `Shape()`
	)

inline

This operator is DEPRECATED. Perform pooling on the input.

The shapes for 2-D pooling is

data: *(batch_size, channel, height, width)*
out: *(batch_size, num_filter, out_height, out_width)*, with::

out_height = f(height, kernel[0], pad[0], stride[0]) out_width = f(width, kernel[1], pad[1], stride[1])

The definition of f depends on pooling_convention, which has two options:

valid (default)::

f(x, k, p, s) = floor((x+2*p-k)/s)+1

full, which is compatible with Caffe::

f(x, k, p, s) = ceil((x+2*p-k)/s)+1

But global_pool is set to be true, then do a global pooling, namely reset kernel=(height, width).

Three pooling options are supported by pool_type:

avg: average pooling
max: max pooling
sum: sum pooling

1-D pooling is special case of 2-D pooling with weight=1 and kernel[1]=1.

For 3-D pooling, an additional depth dimension is added before height. Namely the input data will have shape *(batch_size, channel, depth, height, width)*.

   Defined in src/operator/pooling_v1.cc:L104

Parameters

symbol_name	name of the resulting symbol
data	Input data to the pooling operator.
kernel	pooling kernel size: (y, x) or (d, y, x)
pool_type	Pooling type to be applied.
global_pool	Ignore kernel size, do global pooling based on current input
pooling_convention	Pooling convention to be applied.
stride	stride: for pooling (y, x) or (d, y, x)
pad	pad for pooling: (y, x) or (d, y, x)

Returns: new symbol

Symbol mxnet::cpp::Pooling_v1	(	Symbol	data,
		Shape	kernel = `Shape()`,
		Pooling_v1PoolType	pool_type = `Pooling_v1PoolType::kMax`,
		bool	global_pool = `false`,
		Pooling_v1PoolingConvention	pooling_convention = `Pooling_v1PoolingConvention::kValid`,
		Shape	stride = `Shape()`,
		Shape	pad = `Shape()`
	)

inline

This operator is DEPRECATED. Perform pooling on the input.

The shapes for 2-D pooling is

data: *(batch_size, channel, height, width)*
out: *(batch_size, num_filter, out_height, out_width)*, with::

out_height = f(height, kernel[0], pad[0], stride[0]) out_width = f(width, kernel[1], pad[1], stride[1])

The definition of f depends on pooling_convention, which has two options:

valid (default)::

f(x, k, p, s) = floor((x+2*p-k)/s)+1

full, which is compatible with Caffe::

f(x, k, p, s) = ceil((x+2*p-k)/s)+1

But global_pool is set to be true, then do a global pooling, namely reset kernel=(height, width).

Three pooling options are supported by pool_type:

avg: average pooling
max: max pooling
sum: sum pooling

1-D pooling is special case of 2-D pooling with weight=1 and kernel[1]=1.

For 3-D pooling, an additional depth dimension is added before height. Namely the input data will have shape *(batch_size, channel, depth, height, width)*.

   Defined in src/operator/pooling_v1.cc:L104

Parameters

data	Input data to the pooling operator.
kernel	pooling kernel size: (y, x) or (d, y, x)
pool_type	Pooling type to be applied.
global_pool	Ignore kernel size, do global pooling based on current input
pooling_convention	Pooling convention to be applied.
stride	stride: for pooling (y, x) or (d, y, x)
pad	pad for pooling: (y, x) or (d, y, x)

Returns: new symbol

Symbol mxnet::cpp::prod	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the product of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L146

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::prod	(	Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the product of array elements over given axes.

   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L146

Parameters

data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::radians	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Converts each element of the input array from degrees to radians.

   .. math::
   radians([0, 90, 180, 270, 360]) = [0, \pi/2, \pi, 3\pi/2, 2\pi]

   The storage type of ``radians`` output depends upon the input storage type:

   - radians(default) = default
   - radians(row_sparse) = row_sparse
   - radians(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L182

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::radians ( Symbol data )

inline

Converts each element of the input array from degrees to radians.

   .. math::
   radians([0, 90, 180, 270, 360]) = [0, \pi/2, \pi, 3\pi/2, 2\pi]

   The storage type of ``radians`` output depends upon the input storage type:

   - radians(default) = default
   - radians(row_sparse) = row_sparse
   - radians(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L182

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::rcbrt	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise inverse cube-root value of the input.

   .. math::
   rcbrt(x) = 1/\sqrt[3]{x}

   Example::

   rcbrt([1,8,-125]) = [1.0, 0.5, -0.2]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L906

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::rcbrt ( Symbol data )

inline

Returns element-wise inverse cube-root value of the input.

   .. math::
   rcbrt(x) = 1/\sqrt[3]{x}

   Example::

   rcbrt([1,8,-125]) = [1.0, 0.5, -0.2]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L906

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::reciprocal	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns the reciprocal of the argument, element-wise.

   Calculates 1/x.

   Example::

   reciprocal([-2, 1, 3, 1.6, 0.2]) = [-0.5, 1.0, 0.33333334, 0.625, 5.0]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L646

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::reciprocal ( Symbol data )

inline

Returns the reciprocal of the argument, element-wise.

   Calculates 1/x.

   Example::

   reciprocal([-2, 1, 3, 1.6, 0.2]) = [-0.5, 1.0, 0.33333334, 0.625, 5.0]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L646

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::relu	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Computes rectified linear.

   .. math::
   max(features, 0)

   The storage type of ``relu`` output depends upon the input storage type:

   - relu(default) = default
   - relu(row_sparse) = row_sparse
   - relu(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L85

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::relu ( Symbol data )

inline

Computes rectified linear.

   .. math::
   max(features, 0)

   The storage type of ``relu`` output depends upon the input storage type:

   - relu(default) = default
   - relu(row_sparse) = row_sparse
   - relu(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L85

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::repeat	(	const std::string &	symbol_name,
		Symbol	data,
		int	repeats,
		dmlc::optional< int >	axis = `dmlc::optional<int>()`
	)

inline

Repeats elements of an array.

   By default, ``repeat`` flattens the input array into 1-D and then repeats the
   elements::

   x = [[ 1, 2],
   [ 3, 4]]

   repeat(x, repeats=2) = [ 1.,  1.,  2.,  2.,  3.,  3.,  4.,  4.]

   The parameter ``axis`` specifies the axis along which to perform repeat::

   repeat(x, repeats=2, axis=1) = [[ 1.,  1.,  2.,  2.],
   [ 3.,  3.,  4.,  4.]]

   repeat(x, repeats=2, axis=0) = [[ 1.,  2.],
   [ 1.,  2.],
   [ 3.,  4.],
   [ 3.,  4.]]

   repeat(x, repeats=2, axis=-1) = [[ 1.,  1.,  2.,  2.],
   [ 3.,  3.,  4.,  4.]]



   Defined in src/operator/tensor/matrix_op.cc:L691

Parameters

symbol_name	name of the resulting symbol
data	Input data array
repeats	The number of repetitions for each element.
axis	The axis along which to repeat values. The negative numbers are interpreted counting from the backward. By default, use the flattened input

Returns: new symbol

Symbol mxnet::cpp::repeat	(	Symbol	data,
		int	repeats,
		dmlc::optional< int >	axis = `dmlc::optional<int>()`
	)

inline

Repeats elements of an array.

   By default, ``repeat`` flattens the input array into 1-D and then repeats the
   elements::

   x = [[ 1, 2],
   [ 3, 4]]

   repeat(x, repeats=2) = [ 1.,  1.,  2.,  2.,  3.,  3.,  4.,  4.]

   The parameter ``axis`` specifies the axis along which to perform repeat::

   repeat(x, repeats=2, axis=1) = [[ 1.,  1.,  2.,  2.],
   [ 3.,  3.,  4.,  4.]]

   repeat(x, repeats=2, axis=0) = [[ 1.,  2.],
   [ 1.,  2.],
   [ 3.,  4.],
   [ 3.,  4.]]

   repeat(x, repeats=2, axis=-1) = [[ 1.,  1.,  2.,  2.],
   [ 3.,  3.,  4.,  4.]]



   Defined in src/operator/tensor/matrix_op.cc:L691

Parameters

data	Input data array
repeats	The number of repetitions for each element.
axis	The axis along which to repeat values. The negative numbers are interpreted counting from the backward. By default, use the flattened input

Returns: new symbol

Symbol mxnet::cpp::Reshape	(	const std::string &	symbol_name,
		Symbol	data,
		Shape	shape = `Shape()`,
		bool	reverse = `false`,
		Shape	target_shape = `Shape()`,
		bool	keep_highest = `false`
	)

inline

Reshapes the input array.

   .. note:: ``Reshape`` is deprecated, use ``reshape``

   Given an array and a shape, this function returns a copy of the array in the
   The shape is a tuple of integers such as (2,3,4). The size of the new shape

   Example::

   reshape([1,2,3,4], shape=(2,2)) = [[1,2], [3,4]]

   Some dimensions of the shape can take special values from the set {0, -1, -2,

   - ``0``  copy this dimension from the input to the output shape.

   Example::

   - input shape = (2,3,4), shape = (4,0,2), output shape = (4,3,2)
   - input shape = (2,3,4), shape = (2,0,0), output shape = (2,3,4)

   - ``-1`` infers the dimension of the output shape by using the remainder of the
   keeping the size of the new array same as that of the input array.
   At most one dimension of shape can be -1.

   Example::

   - input shape = (2,3,4), shape = (6,1,-1), output shape = (6,1,4)
   - input shape = (2,3,4), shape = (3,-1,8), output shape = (3,1,8)
   - input shape = (2,3,4), shape=(-1,), output shape = (24,)

   - ``-2`` copy all/remainder of the input dimensions to the output shape.

   Example::

   - input shape = (2,3,4), shape = (-2,), output shape = (2,3,4)
   - input shape = (2,3,4), shape = (2,-2), output shape = (2,3,4)
   - input shape = (2,3,4), shape = (-2,1,1), output shape = (2,3,4,1,1)

   - ``-3`` use the product of two consecutive dimensions of the input shape as

   Example::

   - input shape = (2,3,4), shape = (-3,4), output shape = (6,4)
   - input shape = (2,3,4,5), shape = (-3,-3), output shape = (6,20)
   - input shape = (2,3,4), shape = (0,-3), output shape = (2,12)
   - input shape = (2,3,4), shape = (-3,-2), output shape = (6,4)

   - ``-4`` split one dimension of the input into two dimensions passed subsequent

   Example::

   - input shape = (2,3,4), shape = (-4,1,2,-2), output shape =(1,2,3,4)
   - input shape = (2,3,4), shape = (2,-4,-1,3,-2), output shape = (2,1,3,4)

   If the argument `reverse` is set to 1, then the special values are inferred

   Example::

   - without reverse=1, for input shape = (10,5,4), shape = (-1,0), output shape
   - with reverse=1, output shape will be (50,4).



   Defined in src/operator/tensor/matrix_op.cc:L169

Parameters

symbol_name	name of the resulting symbol
data	Input data to reshape.
shape	The target shape
reverse	If true then the special values are inferred from right to left
target_shape	(Deprecated! Use `shape` instead.) Target new shape. One and
keep_highest	(Deprecated! Use `shape` instead.) Whether keep the highest dim unchanged.If set to true, then the first dim in target_shape is ignored,and

Returns: new symbol

Symbol mxnet::cpp::Reshape	(	Symbol	data,
		Shape	shape = `Shape()`,
		bool	reverse = `false`,
		Shape	target_shape = `Shape()`,
		bool	keep_highest = `false`
	)

inline

Reshapes the input array.

   .. note:: ``Reshape`` is deprecated, use ``reshape``

   Given an array and a shape, this function returns a copy of the array in the
   The shape is a tuple of integers such as (2,3,4). The size of the new shape

   Example::

   reshape([1,2,3,4], shape=(2,2)) = [[1,2], [3,4]]

   Some dimensions of the shape can take special values from the set {0, -1, -2,

   - ``0``  copy this dimension from the input to the output shape.

   Example::

   - input shape = (2,3,4), shape = (4,0,2), output shape = (4,3,2)
   - input shape = (2,3,4), shape = (2,0,0), output shape = (2,3,4)

   - ``-1`` infers the dimension of the output shape by using the remainder of the
   keeping the size of the new array same as that of the input array.
   At most one dimension of shape can be -1.

   Example::

   - input shape = (2,3,4), shape = (6,1,-1), output shape = (6,1,4)
   - input shape = (2,3,4), shape = (3,-1,8), output shape = (3,1,8)
   - input shape = (2,3,4), shape=(-1,), output shape = (24,)

   - ``-2`` copy all/remainder of the input dimensions to the output shape.

   Example::

   - input shape = (2,3,4), shape = (-2,), output shape = (2,3,4)
   - input shape = (2,3,4), shape = (2,-2), output shape = (2,3,4)
   - input shape = (2,3,4), shape = (-2,1,1), output shape = (2,3,4,1,1)

   - ``-3`` use the product of two consecutive dimensions of the input shape as

   Example::

   - input shape = (2,3,4), shape = (-3,4), output shape = (6,4)
   - input shape = (2,3,4,5), shape = (-3,-3), output shape = (6,20)
   - input shape = (2,3,4), shape = (0,-3), output shape = (2,12)
   - input shape = (2,3,4), shape = (-3,-2), output shape = (6,4)

   - ``-4`` split one dimension of the input into two dimensions passed subsequent

   Example::

   - input shape = (2,3,4), shape = (-4,1,2,-2), output shape =(1,2,3,4)
   - input shape = (2,3,4), shape = (2,-4,-1,3,-2), output shape = (2,1,3,4)

   If the argument `reverse` is set to 1, then the special values are inferred

   Example::

   - without reverse=1, for input shape = (10,5,4), shape = (-1,0), output shape
   - with reverse=1, output shape will be (50,4).



   Defined in src/operator/tensor/matrix_op.cc:L169

Parameters

data	Input data to reshape.
shape	The target shape
reverse	If true then the special values are inferred from right to left
target_shape	(Deprecated! Use `shape` instead.) Target new shape. One and
keep_highest	(Deprecated! Use `shape` instead.) Whether keep the highest dim unchanged.If set to true, then the first dim in target_shape is ignored,and

Returns: new symbol

Symbol mxnet::cpp::reshape_like	(	const std::string &	symbol_name,
		Symbol	lhs,
		Symbol	rhs
	)

inline

Reshape some or all dimensions of lhs to have the same shape as some or all

   Returns a **view** of the `lhs` array with a new shape without altering any

   Example::

   x = [1, 2, 3, 4, 5, 6]
   y = [[0, -4], [3, 2], [2, 2]]
   reshape_like(x, y) = [[1, 2], [3, 4], [5, 6]]

   More precise control over how dimensions are inherited is achieved by
   slices over the `lhs` and `rhs` array dimensions. Only the sliced `lhs`
   are reshaped to the `rhs` sliced dimensions, with the non-sliced `lhs`

   Examples::

   - lhs shape = (30,7), rhs shape = (15,2,4), lhs_begin=0, lhs_end=1,
   - lhs shape = (3, 5), rhs shape = (1,15,4), lhs_begin=0, lhs_end=2,

   Negative indices are supported, and `None` can be used for either `lhs_end` or

   Example::

   - lhs shape = (30, 12), rhs shape = (4, 2, 2, 3), lhs_begin=-1, lhs_end=None,



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L453

Parameters

symbol_name	name of the resulting symbol
lhs	First input.
rhs	Second input.

Returns: new symbol

Symbol mxnet::cpp::reshape_like	(	Symbol	lhs,
		Symbol	rhs
	)

inline

Reshape some or all dimensions of lhs to have the same shape as some or all

   Returns a **view** of the `lhs` array with a new shape without altering any

   Example::

   x = [1, 2, 3, 4, 5, 6]
   y = [[0, -4], [3, 2], [2, 2]]
   reshape_like(x, y) = [[1, 2], [3, 4], [5, 6]]

   More precise control over how dimensions are inherited is achieved by
   slices over the `lhs` and `rhs` array dimensions. Only the sliced `lhs`
   are reshaped to the `rhs` sliced dimensions, with the non-sliced `lhs`

   Examples::

   - lhs shape = (30,7), rhs shape = (15,2,4), lhs_begin=0, lhs_end=1,
   - lhs shape = (3, 5), rhs shape = (1,15,4), lhs_begin=0, lhs_end=2,

   Negative indices are supported, and `None` can be used for either `lhs_end` or

   Example::

   - lhs shape = (30, 12), rhs shape = (4, 2, 2, 3), lhs_begin=-1, lhs_end=None,



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L453

Parameters

lhs	First input.
rhs	Second input.

Returns: new symbol

Symbol mxnet::cpp::reverse	(	const std::string &	symbol_name,
		Symbol	data,
		Shape	axis
	)

inline

Reverses the order of elements along given axis while preserving array shape.

   Note: reverse and flip are equivalent. We use reverse in the following examples.

   Examples::

   x = [[ 0.,  1.,  2.,  3.,  4.],
   [ 5.,  6.,  7.,  8.,  9.]]

   reverse(x, axis=0) = [[ 5.,  6.,  7.,  8.,  9.],
   [ 0.,  1.,  2.,  3.,  4.]]

   reverse(x, axis=1) = [[ 4.,  3.,  2.,  1.,  0.],
   [ 9.,  8.,  7.,  6.,  5.]]


   Defined in src/operator/tensor/matrix_op.cc:L793

Parameters

symbol_name	name of the resulting symbol
data	Input data array
axis	The axis which to reverse elements.

Returns: new symbol

Symbol mxnet::cpp::reverse	(	Symbol	data,
		Shape	axis
	)

inline

Reverses the order of elements along given axis while preserving array shape.

   Note: reverse and flip are equivalent. We use reverse in the following examples.

   Examples::

   x = [[ 0.,  1.,  2.,  3.,  4.],
   [ 5.,  6.,  7.,  8.,  9.]]

   reverse(x, axis=0) = [[ 5.,  6.,  7.,  8.,  9.],
   [ 0.,  1.,  2.,  3.,  4.]]

   reverse(x, axis=1) = [[ 4.,  3.,  2.,  1.,  0.],
   [ 9.,  8.,  7.,  6.,  5.]]


   Defined in src/operator/tensor/matrix_op.cc:L793

Parameters

data	Input data array
axis	The axis which to reverse elements.

Returns: new symbol

Symbol mxnet::cpp::rint	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise rounded value to the nearest integer of the input.

   .. note::
   - For input ``n.5`` ``rint`` returns ``n`` while ``round`` returns ``n+1``.
   - For input ``-n.5`` both ``rint`` and ``round`` returns ``-n-1``.

   Example::

   rint([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  1., -2.,  2.,  2.]

   The storage type of ``rint`` output depends upon the input storage type:

   - rint(default) = default
   - rint(row_sparse) = row_sparse
   - rint(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L727

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::rint ( Symbol data )

inline

Returns element-wise rounded value to the nearest integer of the input.

   .. note::
   - For input ``n.5`` ``rint`` returns ``n`` while ``round`` returns ``n+1``.
   - For input ``-n.5`` both ``rint`` and ``round`` returns ``-n-1``.

   Example::

   rint([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  1., -2.,  2.,  2.]

   The storage type of ``rint`` output depends upon the input storage type:

   - rint(default) = default
   - rint(row_sparse) = row_sparse
   - rint(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L727

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::rmsprop_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		Symbol	n,
		mx_float	lr,
		mx_float	gamma1 = `0.95`,
		mx_float	epsilon = `1e-08`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		mx_float	clip_weights = `-1`
	)

inline

Update function for RMSProp optimizer.

   `RMSprop` is a variant of stochastic gradient descent where the gradients are
   divided by a cache which grows with the sum of squares of recent gradients?

   `RMSProp` is similar to `AdaGrad`, a popular variant of `SGD` which adaptively
   tunes the learning rate of each parameter. `AdaGrad` lowers the learning rate
   each parameter monotonically over the course of training.
   While this is analytically motivated for convex optimizations, it may not be
   for non-convex problems. `RMSProp` deals with this heuristically by allowing the
   learning rates to rebound as the denominator decays over time.

   Define the Root Mean Square (RMS) error criterion of the gradient as
   :math:`RMS[g]_t = \sqrt{E[g^2]_t + \epsilon}`, where :math:`g` represents
   gradient and :math:`E[g^2]_t` is the decaying average over past squared

   The :math:`E[g^2]_t` is given by:

   .. math::
   E[g^2]_t = \gamma * E[g^2]_{t-1} + (1-\gamma) * g_t^2

   The update step is

   .. math::
   \theta_{t+1} = \theta_t - \frac{\eta}{RMS[g]_t} g_t

   The RMSProp code follows the version in
   http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
   Tieleman & Hinton, 2012.

   Hinton suggests the momentum term :math:`\gamma` to be 0.9 and the learning rate
   :math:`\eta` to be 0.001.



   Defined in src/operator/optimizer_op.cc:L553

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
n	n
lr	Learning rate
gamma1	The decay rate of momentum estimates.
epsilon	A small constant for numerical stability.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
clip_weights	Clip weights to the range of [-clip_weights, clip_weights] If clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,

Returns: new symbol

Symbol mxnet::cpp::rmsprop_update	(	Symbol	weight,
		Symbol	grad,
		Symbol	n,
		mx_float	lr,
		mx_float	gamma1 = `0.95`,
		mx_float	epsilon = `1e-08`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		mx_float	clip_weights = `-1`
	)

inline

Update function for RMSProp optimizer.

   `RMSprop` is a variant of stochastic gradient descent where the gradients are
   divided by a cache which grows with the sum of squares of recent gradients?

   `RMSProp` is similar to `AdaGrad`, a popular variant of `SGD` which adaptively
   tunes the learning rate of each parameter. `AdaGrad` lowers the learning rate
   each parameter monotonically over the course of training.
   While this is analytically motivated for convex optimizations, it may not be
   for non-convex problems. `RMSProp` deals with this heuristically by allowing the
   learning rates to rebound as the denominator decays over time.

   Define the Root Mean Square (RMS) error criterion of the gradient as
   :math:`RMS[g]_t = \sqrt{E[g^2]_t + \epsilon}`, where :math:`g` represents
   gradient and :math:`E[g^2]_t` is the decaying average over past squared

   The :math:`E[g^2]_t` is given by:

   .. math::
   E[g^2]_t = \gamma * E[g^2]_{t-1} + (1-\gamma) * g_t^2

   The update step is

   .. math::
   \theta_{t+1} = \theta_t - \frac{\eta}{RMS[g]_t} g_t

   The RMSProp code follows the version in
   http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf
   Tieleman & Hinton, 2012.

   Hinton suggests the momentum term :math:`\gamma` to be 0.9 and the learning rate
   :math:`\eta` to be 0.001.



   Defined in src/operator/optimizer_op.cc:L553

Parameters

weight	Weight
grad	Gradient
n	n
lr	Learning rate
gamma1	The decay rate of momentum estimates.
epsilon	A small constant for numerical stability.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
clip_weights	Clip weights to the range of [-clip_weights, clip_weights] If clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,

Returns: new symbol

Symbol mxnet::cpp::rmspropalex_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		Symbol	n,
		Symbol	g,
		Symbol	delta,
		mx_float	lr,
		mx_float	gamma1 = `0.95`,
		mx_float	gamma2 = `0.9`,
		mx_float	epsilon = `1e-08`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		mx_float	clip_weights = `-1`
	)

inline

Update function for RMSPropAlex optimizer.

   `RMSPropAlex` is non-centered version of `RMSProp`.

   Define :math:`E[g^2]_t` is the decaying average over past squared gradient and
   :math:`E[g]_t` is the decaying average over past gradient.

   .. math::
   E[g^2]_t = \gamma_1 * E[g^2]_{t-1} + (1 - \gamma_1) * g_t^2\\
   E[g]_t = \gamma_1 * E[g]_{t-1} + (1 - \gamma_1) * g_t\\
   \Delta_t = \gamma_2 * \Delta_{t-1} - \frac{\eta}{\sqrt{E[g^2]_t - E[g]_t^2 +

   The update step is

   .. math::
   \theta_{t+1} = \theta_t + \Delta_t

   The RMSPropAlex code follows the version in
   http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.

   Graves suggests the momentum term :math:`\gamma_1` to be 0.95, :math:`\gamma_2`
   to be 0.9 and the learning rate :math:`\eta` to be 0.0001.


   Defined in src/operator/optimizer_op.cc:L592

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
n	n
g	g
delta	delta
lr	Learning rate
gamma1	Decay rate.
gamma2	Decay rate.
epsilon	A small constant for numerical stability.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
clip_weights	Clip weights to the range of [-clip_weights, clip_weights] If clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,

Returns: new symbol

Symbol mxnet::cpp::rmspropalex_update	(	Symbol	weight,
		Symbol	grad,
		Symbol	n,
		Symbol	g,
		Symbol	delta,
		mx_float	lr,
		mx_float	gamma1 = `0.95`,
		mx_float	gamma2 = `0.9`,
		mx_float	epsilon = `1e-08`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		mx_float	clip_weights = `-1`
	)

inline

Update function for RMSPropAlex optimizer.

   `RMSPropAlex` is non-centered version of `RMSProp`.

   Define :math:`E[g^2]_t` is the decaying average over past squared gradient and
   :math:`E[g]_t` is the decaying average over past gradient.

   .. math::
   E[g^2]_t = \gamma_1 * E[g^2]_{t-1} + (1 - \gamma_1) * g_t^2\\
   E[g]_t = \gamma_1 * E[g]_{t-1} + (1 - \gamma_1) * g_t\\
   \Delta_t = \gamma_2 * \Delta_{t-1} - \frac{\eta}{\sqrt{E[g^2]_t - E[g]_t^2 +

   The update step is

   .. math::
   \theta_{t+1} = \theta_t + \Delta_t

   The RMSPropAlex code follows the version in
   http://arxiv.org/pdf/1308.0850v5.pdf Eq(38) - Eq(45) by Alex Graves, 2013.

   Graves suggests the momentum term :math:`\gamma_1` to be 0.95, :math:`\gamma_2`
   to be 0.9 and the learning rate :math:`\eta` to be 0.0001.


   Defined in src/operator/optimizer_op.cc:L592

Parameters

weight	Weight
grad	Gradient
n	n
g	g
delta	delta
lr	Learning rate
gamma1	Decay rate.
gamma2	Decay rate.
epsilon	A small constant for numerical stability.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
clip_weights	Clip weights to the range of [-clip_weights, clip_weights] If clip_weights <= 0, weight clipping is turned off. weights = max(min(weights,

Returns: new symbol

Symbol mxnet::cpp::RNN	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	parameters,
		Symbol	state,
		Symbol	state_cell,
		uint32_t	state_size,
		uint32_t	num_layers,
		RNNMode	mode,
		bool	bidirectional = `false`,
		mx_float	p = `0`,
		bool	state_outputs = `false`,
		dmlc::optional< int >	projection_size = `dmlc::optional<int>()`,
		dmlc::optional< double >	lstm_state_clip_min = `dmlc::optional<double>()`,
		dmlc::optional< double >	lstm_state_clip_max = `dmlc::optional<double>()`,
		bool	lstm_state_clip_nan = `false`
	)

inline

Applies recurrent layers to input data. Currently, vanilla RNN, LSTM and GRU are implemented, with both multi-layer and bidirectional support.

Vanilla RNN

Applies a single-gate recurrent layer to input X. Two kinds of activation ReLU and Tanh.

With ReLU activation function:

.. math:: h_t = relu(W_{ih} * x_t + b_{ih} + W_{hh} * h_{(t-1)} + b_{hh})

With Tanh activtion function:

.. math:: h_t = (W_{ih} * x_t + b_{ih} + W_{hh} * h_{(t-1)} + b_{hh})

Reference paper: Finding structure in time - Elman, 1988. https://crl.ucsd.edu/~elman/Papers/fsit.pdf

LSTM

Long Short-Term Memory - Hochreiter, 1997.

.. math:: {array}{ll} i_t = {sigmoid}(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \ f_t = {sigmoid}(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \ g_t = (W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \ o_t = {sigmoid}(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \ c_t = f_t * c_{(t-1)} + i_t * g_t \ h_t = o_t * (c_t) {array}

GRU

Gated Recurrent Unit - Cho et al. 2014. http://arxiv.org/abs/1406.1078

The definition of GRU here is slightly different from paper but compatible with

.. math:: {array}{ll} r_t = {sigmoid}(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \ z_t = {sigmoid}(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \ n_t = (W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \ h_t = (1 - z_t) * n_t + z_t * h_{(t-1)} \ {array}

Parameters

symbol_name	name of the resulting symbol
data	Input data to RNN
parameters	Vector of all RNN trainable parameters concatenated
state	initial hidden state of the RNN
state_cell	initial cell state for LSTM networks (only for LSTM)
state_size	size of the state for each layer
num_layers	number of stacked layers
mode	the type of RNN to compute
bidirectional	whether to use bidirectional recurrent layers
p	drop rate of the dropout on the outputs of each RNN layer, except the last
state_outputs	Whether to have the states as symbol outputs.
projection_size	size of project size
lstm_state_clip_min	Minimum clip value of LSTM states. This option must be used
lstm_state_clip_max	Maximum clip value of LSTM states. This option must be used
lstm_state_clip_nan	Whether to stop NaN from propagating in state by clipping

Returns: new symbol

Symbol mxnet::cpp::RNN	(	Symbol	data,
		Symbol	parameters,
		Symbol	state,
		Symbol	state_cell,
		uint32_t	state_size,
		uint32_t	num_layers,
		RNNMode	mode,
		bool	bidirectional = `false`,
		mx_float	p = `0`,
		bool	state_outputs = `false`,
		dmlc::optional< int >	projection_size = `dmlc::optional<int>()`,
		dmlc::optional< double >	lstm_state_clip_min = `dmlc::optional<double>()`,
		dmlc::optional< double >	lstm_state_clip_max = `dmlc::optional<double>()`,
		bool	lstm_state_clip_nan = `false`
	)

inline

Applies recurrent layers to input data. Currently, vanilla RNN, LSTM and GRU are implemented, with both multi-layer and bidirectional support.

Vanilla RNN

Applies a single-gate recurrent layer to input X. Two kinds of activation ReLU and Tanh.

With ReLU activation function:

.. math:: h_t = relu(W_{ih} * x_t + b_{ih} + W_{hh} * h_{(t-1)} + b_{hh})

With Tanh activtion function:

.. math:: h_t = (W_{ih} * x_t + b_{ih} + W_{hh} * h_{(t-1)} + b_{hh})

Reference paper: Finding structure in time - Elman, 1988. https://crl.ucsd.edu/~elman/Papers/fsit.pdf

LSTM

Long Short-Term Memory - Hochreiter, 1997.

.. math:: {array}{ll} i_t = {sigmoid}(W_{ii} x_t + b_{ii} + W_{hi} h_{(t-1)} + b_{hi}) \ f_t = {sigmoid}(W_{if} x_t + b_{if} + W_{hf} h_{(t-1)} + b_{hf}) \ g_t = (W_{ig} x_t + b_{ig} + W_{hc} h_{(t-1)} + b_{hg}) \ o_t = {sigmoid}(W_{io} x_t + b_{io} + W_{ho} h_{(t-1)} + b_{ho}) \ c_t = f_t * c_{(t-1)} + i_t * g_t \ h_t = o_t * (c_t) {array}

GRU

Gated Recurrent Unit - Cho et al. 2014. http://arxiv.org/abs/1406.1078

The definition of GRU here is slightly different from paper but compatible with

.. math:: {array}{ll} r_t = {sigmoid}(W_{ir} x_t + b_{ir} + W_{hr} h_{(t-1)} + b_{hr}) \ z_t = {sigmoid}(W_{iz} x_t + b_{iz} + W_{hz} h_{(t-1)} + b_{hz}) \ n_t = (W_{in} x_t + b_{in} + r_t * (W_{hn} h_{(t-1)}+ b_{hn})) \ h_t = (1 - z_t) * n_t + z_t * h_{(t-1)} \ {array}

Parameters

data	Input data to RNN
parameters	Vector of all RNN trainable parameters concatenated
state	initial hidden state of the RNN
state_cell	initial cell state for LSTM networks (only for LSTM)
state_size	size of the state for each layer
num_layers	number of stacked layers
mode	the type of RNN to compute
bidirectional	whether to use bidirectional recurrent layers
p	drop rate of the dropout on the outputs of each RNN layer, except the last
state_outputs	Whether to have the states as symbol outputs.
projection_size	size of project size
lstm_state_clip_min	Minimum clip value of LSTM states. This option must be used
lstm_state_clip_max	Maximum clip value of LSTM states. This option must be used
lstm_state_clip_nan	Whether to stop NaN from propagating in state by clipping

Returns: new symbol

Symbol mxnet::cpp::ROIPooling	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	rois,
		Shape	pooled_size,
		mx_float	spatial_scale
	)

inline

Performs region of interest(ROI) pooling on the input array.

   ROI pooling is a variant of a max pooling layer, in which the output size is
   region of interest is a parameter. Its purpose is to perform max pooling on the
   of non-uniform sizes to obtain fixed-size feature maps. ROI pooling is a
   layer mostly used in training a `Fast R-CNN` network for object detection.

   This operator takes a 4D feature map as an input array and region proposals as
   then it pools over sub-regions of input and produces a fixed-sized output array
   regardless of the ROI size.

   To crop the feature map accordingly, you can resize the bounding box coordinates
   by changing the parameters `rois` and `spatial_scale`.

   The cropped feature maps are pooled by standard max pooling operation to a
   indicated by a `pooled_size` parameter. batch_size will change to the number of
   bounding boxes after `ROIPooling`.

   The size of each region of interest doesn't have to be perfectly divisible by
   the number of pooling sections(`pooled_size`).

   Example::

   x = [[[[  0.,   1.,   2.,   3.,   4.,   5.],
   [  6.,   7.,   8.,   9.,  10.,  11.],
   [ 12.,  13.,  14.,  15.,  16.,  17.],
   [ 18.,  19.,  20.,  21.,  22.,  23.],
   [ 24.,  25.,  26.,  27.,  28.,  29.],
   [ 30.,  31.,  32.,  33.,  34.,  35.],
   [ 36.,  37.,  38.,  39.,  40.,  41.],
   [ 42.,  43.,  44.,  45.,  46.,  47.]]]]

   // region of interest i.e. bounding box coordinates.
   y = [[0,0,0,4,4]]

   // returns array of shape (2,2) according to the given roi with max pooling.
   ROIPooling(x, y, (2,2), 1.0) = [[[[ 14.,  16.],
   [ 26.,  28.]]]]

   // region of interest is changed due to the change in `spacial_scale` parameter.
   ROIPooling(x, y, (2,2), 0.7) = [[[[  7.,   9.],
   [ 19.,  21.]]]]



   Defined in src/operator/roi_pooling.cc:L295

Parameters

symbol_name	name of the resulting symbol
data	The input array to the pooling operator, a 4D Feature maps
rois	Bounding box coordinates, a 2D array of [[batch_index, x1, y1, x2, y2]], where (x1, y1) and (x2, y2) are top left and bottom right corners of designated region of interest. `batch_index` indicates the index of corresponding image in
pooled_size	ROI pooling output shape (h,w)
spatial_scale	Ratio of input feature map height (or w) to raw image height (or

Returns: new symbol

Symbol mxnet::cpp::ROIPooling	(	Symbol	data,
		Symbol	rois,
		Shape	pooled_size,
		mx_float	spatial_scale
	)

inline

Performs region of interest(ROI) pooling on the input array.

   ROI pooling is a variant of a max pooling layer, in which the output size is
   region of interest is a parameter. Its purpose is to perform max pooling on the
   of non-uniform sizes to obtain fixed-size feature maps. ROI pooling is a
   layer mostly used in training a `Fast R-CNN` network for object detection.

   This operator takes a 4D feature map as an input array and region proposals as
   then it pools over sub-regions of input and produces a fixed-sized output array
   regardless of the ROI size.

   To crop the feature map accordingly, you can resize the bounding box coordinates
   by changing the parameters `rois` and `spatial_scale`.

   The cropped feature maps are pooled by standard max pooling operation to a
   indicated by a `pooled_size` parameter. batch_size will change to the number of
   bounding boxes after `ROIPooling`.

   The size of each region of interest doesn't have to be perfectly divisible by
   the number of pooling sections(`pooled_size`).

   Example::

   x = [[[[  0.,   1.,   2.,   3.,   4.,   5.],
   [  6.,   7.,   8.,   9.,  10.,  11.],
   [ 12.,  13.,  14.,  15.,  16.,  17.],
   [ 18.,  19.,  20.,  21.,  22.,  23.],
   [ 24.,  25.,  26.,  27.,  28.,  29.],
   [ 30.,  31.,  32.,  33.,  34.,  35.],
   [ 36.,  37.,  38.,  39.,  40.,  41.],
   [ 42.,  43.,  44.,  45.,  46.,  47.]]]]

   // region of interest i.e. bounding box coordinates.
   y = [[0,0,0,4,4]]

   // returns array of shape (2,2) according to the given roi with max pooling.
   ROIPooling(x, y, (2,2), 1.0) = [[[[ 14.,  16.],
   [ 26.,  28.]]]]

   // region of interest is changed due to the change in `spacial_scale` parameter.
   ROIPooling(x, y, (2,2), 0.7) = [[[[  7.,   9.],
   [ 19.,  21.]]]]



   Defined in src/operator/roi_pooling.cc:L295

Parameters

data	The input array to the pooling operator, a 4D Feature maps
rois	Bounding box coordinates, a 2D array of [[batch_index, x1, y1, x2, y2]], where (x1, y1) and (x2, y2) are top left and bottom right corners of designated region of interest. `batch_index` indicates the index of corresponding image in
pooled_size	ROI pooling output shape (h,w)
spatial_scale	Ratio of input feature map height (or w) to raw image height (or

Returns: new symbol

Symbol mxnet::cpp::round	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise rounded value to the nearest integer of the input.

   Example::

   round([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  2., -2.,  2.,  2.]

   The storage type of ``round`` output depends upon the input storage type:

   - round(default) = default
   - round(row_sparse) = row_sparse
   - round(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L706

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::round ( Symbol data )

inline

Returns element-wise rounded value to the nearest integer of the input.

   Example::

   round([-1.5, 1.5, -1.9, 1.9, 2.1]) = [-2.,  2., -2.,  2.,  2.]

   The storage type of ``round`` output depends upon the input storage type:

   - round(default) = default
   - round(row_sparse) = row_sparse
   - round(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L706

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::rsqrt	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise inverse square-root value of the input.

   .. math::
   rsqrt(x) = 1/\sqrt{x}

   Example::

   rsqrt([4,9,16]) = [0.5, 0.33333334, 0.25]

   The storage type of ``rsqrt`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L866

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::rsqrt ( Symbol data )

inline

Returns element-wise inverse square-root value of the input.

   .. math::
   rsqrt(x) = 1/\sqrt{x}

   Example::

   rsqrt([4,9,16]) = [0.5, 0.33333334, 0.25]

   The storage type of ``rsqrt`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L866

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::scatter_nd	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	indices,
		Shape	shape
	)

inline

Scatters data into a new tensor according to indices.

   Given `data` with shape `(Y_0, ..., Y_{K-1}, X_M, ..., X_{N-1})` and indices
   `(M, Y_0, ..., Y_{K-1})`, the output will have shape `(X_0, X_1, ..., X_{N-1})`,
   where `M <= N`. If `M == N`, data shape should simply be `(Y_0, ..., Y_{K-1})`.

   The elements in output is defined as follows::

   output[indices[0, y_0, ..., y_{K-1}],
   ...,
   indices[M-1, y_0, ..., y_{K-1}],
   x_M, ..., x_{N-1}] = data[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}]

   all other entries in output are 0.

   .. warning::

   If the indices have duplicates, the result will be non-deterministic and
   the gradient of `scatter_nd` will not be correct!!


   Examples::

   data = [2, 3, 0]
   indices = [[1, 1, 0], [0, 1, 0]]
   shape = (2, 2)
   scatter_nd(data, indices, shape) = [[0, 0], [2, 3]]

   data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
   indices = [[0, 1], [1, 1]]
   shape = (2, 2, 2, 2)
   scatter_nd(data, indices, shape) = [[[[0, 0],
   [0, 0]],

   [[1, 2],
   [3, 4]]],

   [[[0, 0],
   [0, 0]],

   [[5, 6],
   [7, 8]]]]

Parameters

symbol_name	name of the resulting symbol
data	data
indices	indices
shape	Shape of output.

Returns: new symbol

Symbol mxnet::cpp::scatter_nd	(	Symbol	data,
		Symbol	indices,
		Shape	shape
	)

inline

Scatters data into a new tensor according to indices.

   Given `data` with shape `(Y_0, ..., Y_{K-1}, X_M, ..., X_{N-1})` and indices
   `(M, Y_0, ..., Y_{K-1})`, the output will have shape `(X_0, X_1, ..., X_{N-1})`,
   where `M <= N`. If `M == N`, data shape should simply be `(Y_0, ..., Y_{K-1})`.

   The elements in output is defined as follows::

   output[indices[0, y_0, ..., y_{K-1}],
   ...,
   indices[M-1, y_0, ..., y_{K-1}],
   x_M, ..., x_{N-1}] = data[y_0, ..., y_{K-1}, x_M, ..., x_{N-1}]

   all other entries in output are 0.

   .. warning::

   If the indices have duplicates, the result will be non-deterministic and
   the gradient of `scatter_nd` will not be correct!!


   Examples::

   data = [2, 3, 0]
   indices = [[1, 1, 0], [0, 1, 0]]
   shape = (2, 2)
   scatter_nd(data, indices, shape) = [[0, 0], [2, 3]]

   data = [[[1, 2], [3, 4]], [[5, 6], [7, 8]]]
   indices = [[0, 1], [1, 1]]
   shape = (2, 2, 2, 2)
   scatter_nd(data, indices, shape) = [[[[0, 0],
   [0, 0]],

   [[1, 2],
   [3, 4]]],

   [[[0, 0],
   [0, 0]],

   [[5, 6],
   [7, 8]]]]

Parameters

data	data
indices	indices
shape	Shape of output.

Returns: new symbol

Symbol mxnet::cpp::SequenceLast	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	sequence_length,
		bool	use_sequence_length = `false`,
		int	axis = `0`
	)

inline

Takes the last element of a sequence.

   This function takes an n-dimensional input array of the form
   [max_sequence_length, batch_size, other_feature_dims] and returns a
   of the form [batch_size, other_feature_dims].

   Parameter `sequence_length` is used to handle variable-length sequences.
   an input array of positive ints of dimension [batch_size]. To use this
   set `use_sequence_length` to `True`, otherwise each example in the batch is
   to have the max sequence length.

   .. note:: Alternatively, you can also use `take` operator.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.],
   [  7.,   8.,   9.]],

   [[ 10.,   11.,   12.],
   [ 13.,   14.,   15.],
   [ 16.,   17.,   18.]],

   [[  19.,   20.,   21.],
   [  22.,   23.,   24.],
   [  25.,   26.,   27.]]]

   // returns last sequence when sequence_length parameter is not used
   SequenceLast(x) = [[  19.,   20.,   21.],
   [  22.,   23.,   24.],
   [  25.,   26.,   27.]]

   // sequence_length is used
   SequenceLast(x, sequence_length=[1,1,1], use_sequence_length=True) =
   [[  1.,   2.,   3.],
   [  4.,   5.,   6.],
   [  7.,   8.,   9.]]

   // sequence_length is used
   SequenceLast(x, sequence_length=[1,2,3], use_sequence_length=True) =
   [[  1.,    2.,   3.],
   [  13.,  14.,  15.],
   [  25.,  26.,  27.]]



   Defined in src/operator/sequence_last.cc:L92

Parameters

symbol_name	name of the resulting symbol
data	n-dimensional input array of the form [max_sequence_length, batch_size,
sequence_length	vector of sequence lengths of the form [batch_size]
use_sequence_length	If set to true, this layer takes in an extra input
axis	The sequence axis. Only values of 0 and 1 are currently supported.

Returns: new symbol

Symbol mxnet::cpp::SequenceLast	(	Symbol	data,
		Symbol	sequence_length,
		bool	use_sequence_length = `false`,
		int	axis = `0`
	)

inline

Takes the last element of a sequence.

   This function takes an n-dimensional input array of the form
   [max_sequence_length, batch_size, other_feature_dims] and returns a
   of the form [batch_size, other_feature_dims].

   Parameter `sequence_length` is used to handle variable-length sequences.
   an input array of positive ints of dimension [batch_size]. To use this
   set `use_sequence_length` to `True`, otherwise each example in the batch is
   to have the max sequence length.

   .. note:: Alternatively, you can also use `take` operator.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.],
   [  7.,   8.,   9.]],

   [[ 10.,   11.,   12.],
   [ 13.,   14.,   15.],
   [ 16.,   17.,   18.]],

   [[  19.,   20.,   21.],
   [  22.,   23.,   24.],
   [  25.,   26.,   27.]]]

   // returns last sequence when sequence_length parameter is not used
   SequenceLast(x) = [[  19.,   20.,   21.],
   [  22.,   23.,   24.],
   [  25.,   26.,   27.]]

   // sequence_length is used
   SequenceLast(x, sequence_length=[1,1,1], use_sequence_length=True) =
   [[  1.,   2.,   3.],
   [  4.,   5.,   6.],
   [  7.,   8.,   9.]]

   // sequence_length is used
   SequenceLast(x, sequence_length=[1,2,3], use_sequence_length=True) =
   [[  1.,    2.,   3.],
   [  13.,  14.,  15.],
   [  25.,  26.,  27.]]



   Defined in src/operator/sequence_last.cc:L92

Parameters

data	n-dimensional input array of the form [max_sequence_length, batch_size,
sequence_length	vector of sequence lengths of the form [batch_size]
use_sequence_length	If set to true, this layer takes in an extra input
axis	The sequence axis. Only values of 0 and 1 are currently supported.

Returns: new symbol

Symbol mxnet::cpp::SequenceMask	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	sequence_length,
		bool	use_sequence_length = `false`,
		mx_float	value = `0`,
		int	axis = `0`
	)

inline

Sets all elements outside the sequence to a constant value.

   This function takes an n-dimensional input array of the form
   [max_sequence_length, batch_size, other_feature_dims] and returns an array of

   Parameter `sequence_length` is used to handle variable-length sequences.
   should be an input array of positive ints of dimension [batch_size].
   To use this parameter, set `use_sequence_length` to `True`,
   otherwise each example in the batch is assumed to have the max sequence length
   this operator works as the `identity` operator.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // Batch 1
   B1 = [[  1.,   2.,   3.],
   [  7.,   8.,   9.],
   [ 13.,  14.,  15.]]

   // Batch 2
   B2 = [[  4.,   5.,   6.],
   [ 10.,  11.,  12.],
   [ 16.,  17.,  18.]]

   // works as identity operator when sequence_length parameter is not used
   SequenceMask(x) = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // sequence_length [1,1] means 1 of each batch will be kept
   // and other rows are masked with default mask value = 0
   SequenceMask(x, sequence_length=[1,1], use_sequence_length=True) =
   [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  0.,   0.,   0.],
   [  0.,   0.,   0.]],

   [[  0.,   0.,   0.],
   [  0.,   0.,   0.]]]

   // sequence_length [2,3] means 2 of batch B1 and 3 of batch B2 will be kept
   // and other rows are masked with value = 1
   SequenceMask(x, sequence_length=[2,3], use_sequence_length=True, value=1) =
   [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [  10.,  11.,  12.]],

   [[   1.,   1.,   1.],
   [  16.,  17.,  18.]]]



   Defined in src/operator/sequence_mask.cc:L114

Parameters

symbol_name	name of the resulting symbol
data	n-dimensional input array of the form [max_sequence_length, batch_size,
sequence_length	vector of sequence lengths of the form [batch_size]
use_sequence_length	If set to true, this layer takes in an extra input
value	The value to be used as a mask.
axis	The sequence axis. Only values of 0 and 1 are currently supported.

Returns: new symbol

Symbol mxnet::cpp::SequenceMask	(	Symbol	data,
		Symbol	sequence_length,
		bool	use_sequence_length = `false`,
		mx_float	value = `0`,
		int	axis = `0`
	)

inline

Sets all elements outside the sequence to a constant value.

   This function takes an n-dimensional input array of the form
   [max_sequence_length, batch_size, other_feature_dims] and returns an array of

   Parameter `sequence_length` is used to handle variable-length sequences.
   should be an input array of positive ints of dimension [batch_size].
   To use this parameter, set `use_sequence_length` to `True`,
   otherwise each example in the batch is assumed to have the max sequence length
   this operator works as the `identity` operator.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // Batch 1
   B1 = [[  1.,   2.,   3.],
   [  7.,   8.,   9.],
   [ 13.,  14.,  15.]]

   // Batch 2
   B2 = [[  4.,   5.,   6.],
   [ 10.,  11.,  12.],
   [ 16.,  17.,  18.]]

   // works as identity operator when sequence_length parameter is not used
   SequenceMask(x) = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // sequence_length [1,1] means 1 of each batch will be kept
   // and other rows are masked with default mask value = 0
   SequenceMask(x, sequence_length=[1,1], use_sequence_length=True) =
   [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  0.,   0.,   0.],
   [  0.,   0.,   0.]],

   [[  0.,   0.,   0.],
   [  0.,   0.,   0.]]]

   // sequence_length [2,3] means 2 of batch B1 and 3 of batch B2 will be kept
   // and other rows are masked with value = 1
   SequenceMask(x, sequence_length=[2,3], use_sequence_length=True, value=1) =
   [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [  10.,  11.,  12.]],

   [[   1.,   1.,   1.],
   [  16.,  17.,  18.]]]



   Defined in src/operator/sequence_mask.cc:L114

Parameters

data	n-dimensional input array of the form [max_sequence_length, batch_size,
sequence_length	vector of sequence lengths of the form [batch_size]
use_sequence_length	If set to true, this layer takes in an extra input
value	The value to be used as a mask.
axis	The sequence axis. Only values of 0 and 1 are currently supported.

Returns: new symbol

Symbol mxnet::cpp::SequenceReverse	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	sequence_length,
		bool	use_sequence_length = `false`,
		int	axis = `0`
	)

inline

Reverses the elements of each sequence.

   This function takes an n-dimensional input array of the form
   and returns an array of the same shape.

   Parameter `sequence_length` is used to handle variable-length sequences.
   `sequence_length` should be an input array of positive ints of dimension
   To use this parameter, set `use_sequence_length` to `True`,
   otherwise each example in the batch is assumed to have the max sequence length.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // Batch 1
   B1 = [[  1.,   2.,   3.],
   [  7.,   8.,   9.],
   [ 13.,  14.,  15.]]

   // Batch 2
   B2 = [[  4.,   5.,   6.],
   [ 10.,  11.,  12.],
   [ 16.,  17.,  18.]]

   // returns reverse sequence when sequence_length parameter is not used
   SequenceReverse(x) = [[[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[  1.,   2.,   3.],
   [  4.,   5.,   6.]]]

   // sequence_length [2,2] means 2 rows of
   // both batch B1 and B2 will be reversed.
   SequenceReverse(x, sequence_length=[2,2], use_sequence_length=True) =
   [[[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // sequence_length [2,3] means 2 of batch B2 and 3 of batch B3
   // will be reversed.
   SequenceReverse(x, sequence_length=[2,3], use_sequence_length=True) =
   [[[  7.,   8.,   9.],
   [ 16.,  17.,  18.]],

   [[  1.,   2.,   3.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14,   15.],
   [  4.,   5.,   6.]]]



   Defined in src/operator/sequence_reverse.cc:L113

Parameters

symbol_name	name of the resulting symbol
data	n-dimensional input array of the form [max_sequence_length, batch_size,
sequence_length	vector of sequence lengths of the form [batch_size]
use_sequence_length	If set to true, this layer takes in an extra input
axis	The sequence axis. Only 0 is currently supported.

Returns: new symbol

Symbol mxnet::cpp::SequenceReverse	(	Symbol	data,
		Symbol	sequence_length,
		bool	use_sequence_length = `false`,
		int	axis = `0`
	)

inline

Reverses the elements of each sequence.

   This function takes an n-dimensional input array of the form
   and returns an array of the same shape.

   Parameter `sequence_length` is used to handle variable-length sequences.
   `sequence_length` should be an input array of positive ints of dimension
   To use this parameter, set `use_sequence_length` to `True`,
   otherwise each example in the batch is assumed to have the max sequence length.

   Example::

   x = [[[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // Batch 1
   B1 = [[  1.,   2.,   3.],
   [  7.,   8.,   9.],
   [ 13.,  14.,  15.]]

   // Batch 2
   B2 = [[  4.,   5.,   6.],
   [ 10.,  11.,  12.],
   [ 16.,  17.,  18.]]

   // returns reverse sequence when sequence_length parameter is not used
   SequenceReverse(x) = [[[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]],

   [[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[  1.,   2.,   3.],
   [  4.,   5.,   6.]]]

   // sequence_length [2,2] means 2 rows of
   // both batch B1 and B2 will be reversed.
   SequenceReverse(x, sequence_length=[2,2], use_sequence_length=True) =
   [[[  7.,   8.,   9.],
   [ 10.,  11.,  12.]],

   [[  1.,   2.,   3.],
   [  4.,   5.,   6.]],

   [[ 13.,  14.,   15.],
   [ 16.,  17.,   18.]]]

   // sequence_length [2,3] means 2 of batch B2 and 3 of batch B3
   // will be reversed.
   SequenceReverse(x, sequence_length=[2,3], use_sequence_length=True) =
   [[[  7.,   8.,   9.],
   [ 16.,  17.,  18.]],

   [[  1.,   2.,   3.],
   [ 10.,  11.,  12.]],

   [[ 13.,  14,   15.],
   [  4.,   5.,   6.]]]



   Defined in src/operator/sequence_reverse.cc:L113

Parameters

data	n-dimensional input array of the form [max_sequence_length, batch_size,
sequence_length	vector of sequence lengths of the form [batch_size]
use_sequence_length	If set to true, this layer takes in an extra input
axis	The sequence axis. Only 0 is currently supported.

Returns: new symbol

Symbol mxnet::cpp::sgd_mom_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		Symbol	mom,
		mx_float	lr,
		mx_float	momentum = `0`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Momentum update function for Stochastic Gradient Descent (SGD) optimizer.

   Momentum update has better convergence rates on neural networks. Mathematically
   like below:

   .. math::

   v_1 = \alpha * \nabla J(W_0)\\
   v_t = \gamma v_{t-1} - \alpha * \nabla J(W_{t-1})\\
   W_t = W_{t-1} + v_t

   It updates the weights using::

   v = momentum * v - learning_rate * gradient
   weight += v

   Where the parameter ``momentum`` is the decay rate of momentum estimates at

   However, if grad's storage type is ``row_sparse``, ``lazy_update`` is True and
   type is the same as momentum's storage type,
   only the row slices whose indices appear in grad.indices are updated (for both

   for row in gradient.indices:
   v[row] = momentum[row] * v[row] - learning_rate * gradient[row]
   weight[row] += v[row]



   Defined in src/operator/optimizer_op.cc:L372

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
mom	Momentum
lr	Learning rate
momentum	The decay rate of momentum estimates at each epoch.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse

Returns: new symbol

Symbol mxnet::cpp::sgd_mom_update	(	Symbol	weight,
		Symbol	grad,
		Symbol	mom,
		mx_float	lr,
		mx_float	momentum = `0`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Momentum update function for Stochastic Gradient Descent (SGD) optimizer.

   Momentum update has better convergence rates on neural networks. Mathematically
   like below:

   .. math::

   v_1 = \alpha * \nabla J(W_0)\\
   v_t = \gamma v_{t-1} - \alpha * \nabla J(W_{t-1})\\
   W_t = W_{t-1} + v_t

   It updates the weights using::

   v = momentum * v - learning_rate * gradient
   weight += v

   Where the parameter ``momentum`` is the decay rate of momentum estimates at

   However, if grad's storage type is ``row_sparse``, ``lazy_update`` is True and
   type is the same as momentum's storage type,
   only the row slices whose indices appear in grad.indices are updated (for both

   for row in gradient.indices:
   v[row] = momentum[row] * v[row] - learning_rate * gradient[row]
   weight[row] += v[row]



   Defined in src/operator/optimizer_op.cc:L372

Parameters

weight	Weight
grad	Gradient
mom	Momentum
lr	Learning rate
momentum	The decay rate of momentum estimates at each epoch.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse

Returns: new symbol

Symbol mxnet::cpp::sgd_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		mx_float	lr,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Update function for Stochastic Gradient Descent (SDG) optimizer.

   It updates the weights using::

   weight = weight - learning_rate * (gradient + wd * weight)

   However, if gradient is of ``row_sparse`` storage type and ``lazy_update`` is
   only the row slices whose indices appear in grad.indices are updated::

   for row in gradient.indices:
   weight[row] = weight[row] - learning_rate * (gradient[row] + wd * weight[row])



   Defined in src/operator/optimizer_op.cc:L331

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
lr	Learning rate
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse.

Returns: new symbol

Symbol mxnet::cpp::sgd_update	(	Symbol	weight,
		Symbol	grad,
		mx_float	lr,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		bool	lazy_update = `true`
	)

inline

Update function for Stochastic Gradient Descent (SDG) optimizer.

   It updates the weights using::

   weight = weight - learning_rate * (gradient + wd * weight)

   However, if gradient is of ``row_sparse`` storage type and ``lazy_update`` is
   only the row slices whose indices appear in grad.indices are updated::

   for row in gradient.indices:
   weight[row] = weight[row] - learning_rate * (gradient[row] + wd * weight[row])



   Defined in src/operator/optimizer_op.cc:L331

Parameters

weight	Weight
grad	Gradient
lr	Learning rate
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
lazy_update	If true, lazy updates are applied if gradient's stype is row_sparse.

Returns: new symbol

Symbol mxnet::cpp::shape_array	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< int >	lhs_begin = `dmlc::optional<int>()`,
		dmlc::optional< int >	lhs_end = `dmlc::optional<int>()`,
		dmlc::optional< int >	rhs_begin = `dmlc::optional<int>()`,
		dmlc::optional< int >	rhs_end = `dmlc::optional<int>()`
	)

inline

Returns a 1D int64 array containing the shape of data.

   Example::

   shape_array([[1,2,3,4], [5,6,7,8]]) = [2,4]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L504

Parameters

symbol_name	name of the resulting symbol
data	Input Array.
lhs_begin	Defaults to 0. The beginning index along which the lhs dimensions are
lhs_end	Defaults to None. The ending index along which the lhs dimensions are
rhs_begin	Defaults to 0. The beginning index along which the rhs dimensions are
rhs_end	Defaults to None. The ending index along which the rhs dimensions are

Returns: new symbol

Symbol mxnet::cpp::shape_array	(	Symbol	data,
		dmlc::optional< int >	lhs_begin = `dmlc::optional<int>()`,
		dmlc::optional< int >	lhs_end = `dmlc::optional<int>()`,
		dmlc::optional< int >	rhs_begin = `dmlc::optional<int>()`,
		dmlc::optional< int >	rhs_end = `dmlc::optional<int>()`
	)

inline

Returns a 1D int64 array containing the shape of data.

   Example::

   shape_array([[1,2,3,4], [5,6,7,8]]) = [2,4]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L504

Parameters

data	Input Array.
lhs_begin	Defaults to 0. The beginning index along which the lhs dimensions are
lhs_end	Defaults to None. The ending index along which the lhs dimensions are
rhs_begin	Defaults to 0. The beginning index along which the rhs dimensions are
rhs_end	Defaults to None. The ending index along which the rhs dimensions are

Returns: new symbol

Symbol mxnet::cpp::sigmoid	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Computes sigmoid of x element-wise.

   .. math::
   y = 1 / (1 + exp(-x))

   The storage type of ``sigmoid`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L101

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::sigmoid ( Symbol data )

inline

Computes sigmoid of x element-wise.

   .. math::
   y = 1 / (1 + exp(-x))

   The storage type of ``sigmoid`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L101

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::sign	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise sign of the input.

   Example::

   sign([-2, 0, 3]) = [-1, 0, 1]

   The storage type of ``sign`` output depends upon the input storage type:

   - sign(default) = default
   - sign(row_sparse) = row_sparse
   - sign(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L687

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::sign ( Symbol data )

inline

Returns element-wise sign of the input.

   Example::

   sign([-2, 0, 3]) = [-1, 0, 1]

   The storage type of ``sign`` output depends upon the input storage type:

   - sign(default) = default
   - sign(row_sparse) = row_sparse
   - sign(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L687

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::signsgd_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		mx_float	lr,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`
	)

inline

Update function for SignSGD optimizer.

   .. math::

   g_t = \nabla J(W_{t-1})\\
   W_t = W_{t-1} - \eta_t \text{sign}(g_t)

   It updates the weights using::

   weight = weight - learning_rate * sign(gradient)

   .. note::
   - sparse ndarray not supported for this optimizer yet.


   Defined in src/operator/optimizer_op.cc:L57

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
lr	Learning rate
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,

Returns: new symbol

Symbol mxnet::cpp::signsgd_update	(	Symbol	weight,
		Symbol	grad,
		mx_float	lr,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`
	)

inline

Update function for SignSGD optimizer.

   .. math::

   g_t = \nabla J(W_{t-1})\\
   W_t = W_{t-1} - \eta_t \text{sign}(g_t)

   It updates the weights using::

   weight = weight - learning_rate * sign(gradient)

   .. note::
   - sparse ndarray not supported for this optimizer yet.


   Defined in src/operator/optimizer_op.cc:L57

Parameters

weight	Weight
grad	Gradient
lr	Learning rate
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,

Returns: new symbol

Symbol mxnet::cpp::signum_update	(	const std::string &	symbol_name,
		Symbol	weight,
		Symbol	grad,
		Symbol	mom,
		mx_float	lr,
		mx_float	momentum = `0`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		mx_float	wd_lh = `0`
	)

inline

SIGN momentUM (Signum) optimizer.

   .. math::

   g_t = \nabla J(W_{t-1})\\
   m_t = \beta m_{t-1} + (1 - \beta) g_t\\
   W_t = W_{t-1} - \eta_t \text{sign}(m_t)

   It updates the weights using::
   state = momentum * state + (1-momentum) * gradient
   weight = weight - learning_rate * sign(state)

   Where the parameter ``momentum`` is the decay rate of momentum estimates at

   .. note::
   - sparse ndarray not supported for this optimizer yet.


   Defined in src/operator/optimizer_op.cc:L86

Parameters

symbol_name	name of the resulting symbol
weight	Weight
grad	Gradient
mom	Momentum
lr	Learning rate
momentum	The decay rate of momentum estimates at each epoch.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
wd_lh	The amount of weight decay that does not go into gradient/momentum

Returns: new symbol

Symbol mxnet::cpp::signum_update	(	Symbol	weight,
		Symbol	grad,
		Symbol	mom,
		mx_float	lr,
		mx_float	momentum = `0`,
		mx_float	wd = `0`,
		mx_float	rescale_grad = `1`,
		mx_float	clip_gradient = `-1`,
		mx_float	wd_lh = `0`
	)

inline

SIGN momentUM (Signum) optimizer.

   .. math::

   g_t = \nabla J(W_{t-1})\\
   m_t = \beta m_{t-1} + (1 - \beta) g_t\\
   W_t = W_{t-1} - \eta_t \text{sign}(m_t)

   It updates the weights using::
   state = momentum * state + (1-momentum) * gradient
   weight = weight - learning_rate * sign(state)

   Where the parameter ``momentum`` is the decay rate of momentum estimates at

   .. note::
   - sparse ndarray not supported for this optimizer yet.


   Defined in src/operator/optimizer_op.cc:L86

Parameters

weight	Weight
grad	Gradient
mom	Momentum
lr	Learning rate
momentum	The decay rate of momentum estimates at each epoch.
wd	Weight decay augments the objective function with a regularization term that penalizes large weights. The penalty scales with the square of the magnitude of
rescale_grad	Rescale gradient to grad = rescale_grad*grad.
clip_gradient	Clip gradient to the range of [-clip_gradient, clip_gradient] If clip_gradient <= 0, gradient clipping is turned off. grad = max(min(grad,
wd_lh	The amount of weight decay that does not go into gradient/momentum

Returns: new symbol

Symbol mxnet::cpp::sin	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Computes the element-wise sine of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   sin([0, \pi/4, \pi/2]) = [0, 0.707, 1]

   The storage type of ``sin`` output depends upon the input storage type:

   - sin(default) = default
   - sin(row_sparse) = row_sparse
   - sin(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L46

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::sin ( Symbol data )

inline

Computes the element-wise sine of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   sin([0, \pi/4, \pi/2]) = [0, 0.707, 1]

   The storage type of ``sin`` output depends upon the input storage type:

   - sin(default) = default
   - sin(row_sparse) = row_sparse
   - sin(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L46

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::sinh	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns the hyperbolic sine of the input array, computed element-wise.

   .. math::
   sinh(x) = 0.5\times(exp(x) - exp(-x))

   The storage type of ``sinh`` output depends upon the input storage type:

   - sinh(default) = default
   - sinh(row_sparse) = row_sparse
   - sinh(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L201

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::sinh ( Symbol data )

inline

Returns the hyperbolic sine of the input array, computed element-wise.

   .. math::
   sinh(x) = 0.5\times(exp(x) - exp(-x))

   The storage type of ``sinh`` output depends upon the input storage type:

   - sinh(default) = default
   - sinh(row_sparse) = row_sparse
   - sinh(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L201

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::size_array	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns a 1D int64 array containing the size of data.

   Example::

   size_array([[1,2,3,4], [5,6,7,8]]) = [8]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L556

Parameters

symbol_name	name of the resulting symbol
data	Input Array.

Returns: new symbol

Symbol mxnet::cpp::size_array ( Symbol data )

inline

Returns a 1D int64 array containing the size of data.

   Example::

   size_array([[1,2,3,4], [5,6,7,8]]) = [8]



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L556

Parameters

data	Input Array.

Returns: new symbol

Symbol mxnet::cpp::slice	(	const std::string &	symbol_name,
		Symbol	data,
		Shape	begin,
		Shape	end,
		Shape	step = `Shape()`
	)

inline

Slices a region of the array.

   .. note:: ``crop`` is deprecated. Use ``slice`` instead.

   This function returns a sliced array between the indices given
   by `begin` and `end` with the corresponding `step`.

   For an input array of ``shape=(d_0, d_1, ..., d_n-1)``,
   slice operation with ``begin=(b_0, b_1...b_m-1)``,
   ``end=(e_0, e_1, ..., e_m-1)``, and ``step=(s_0, s_1, ..., s_m-1)``,
   where m <= n, results in an array with the shape
   ``(|e_0-b_0|/|s_0|, ..., |e_m-1-b_m-1|/|s_m-1|, d_m, ..., d_n-1)``.

   The resulting array's *k*-th dimension contains elements
   from the *k*-th dimension of the input array starting
   from index ``b_k`` (inclusive) with step ``s_k``
   until reaching ``e_k`` (exclusive).

   If the *k*-th elements are `None` in the sequence of `begin`, `end`,
   and `step`, the following rule will be used to set default values.
   If `s_k` is `None`, set `s_k=1`. If `s_k > 0`, set `b_k=0`, `e_k=d_k`;
   else, set `b_k=d_k-1`, `e_k=-1`.

   The storage type of ``slice`` output depends on storage types of inputs

   - slice(csr) = csr
   - otherwise, ``slice`` generates output with default storage

   .. note:: When input data storage type is csr, it only supports
   step=(), or step=(None,), or step=(1,) to generate a csr output.
   For other step parameter values, it falls back to slicing
   a dense tensor.

   Example::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice(x, begin=(0,1), end=(2,4)) = [[ 2.,  3.,  4.],
   [ 6.,  7.,  8.]]
   slice(x, begin=(None, 0), end=(None, 3), step=(-1, 2)) = [[9., 11.],
   [5.,  7.],
   [1.,  3.]]


   Defined in src/operator/tensor/matrix_op.cc:L413

Parameters

symbol_name	name of the resulting symbol
data	Source input
begin	starting indices for the slice operation, supports negative indices.
end	ending indices for the slice operation, supports negative indices.
step	step for the slice operation, supports negative values.

Returns: new symbol

Symbol mxnet::cpp::slice	(	Symbol	data,
		Shape	begin,
		Shape	end,
		Shape	step = `Shape()`
	)

inline

Slices a region of the array.

   .. note:: ``crop`` is deprecated. Use ``slice`` instead.

   This function returns a sliced array between the indices given
   by `begin` and `end` with the corresponding `step`.

   For an input array of ``shape=(d_0, d_1, ..., d_n-1)``,
   slice operation with ``begin=(b_0, b_1...b_m-1)``,
   ``end=(e_0, e_1, ..., e_m-1)``, and ``step=(s_0, s_1, ..., s_m-1)``,
   where m <= n, results in an array with the shape
   ``(|e_0-b_0|/|s_0|, ..., |e_m-1-b_m-1|/|s_m-1|, d_m, ..., d_n-1)``.

   The resulting array's *k*-th dimension contains elements
   from the *k*-th dimension of the input array starting
   from index ``b_k`` (inclusive) with step ``s_k``
   until reaching ``e_k`` (exclusive).

   If the *k*-th elements are `None` in the sequence of `begin`, `end`,
   and `step`, the following rule will be used to set default values.
   If `s_k` is `None`, set `s_k=1`. If `s_k > 0`, set `b_k=0`, `e_k=d_k`;
   else, set `b_k=d_k-1`, `e_k=-1`.

   The storage type of ``slice`` output depends on storage types of inputs

   - slice(csr) = csr
   - otherwise, ``slice`` generates output with default storage

   .. note:: When input data storage type is csr, it only supports
   step=(), or step=(None,), or step=(1,) to generate a csr output.
   For other step parameter values, it falls back to slicing
   a dense tensor.

   Example::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice(x, begin=(0,1), end=(2,4)) = [[ 2.,  3.,  4.],
   [ 6.,  7.,  8.]]
   slice(x, begin=(None, 0), end=(None, 3), step=(-1, 2)) = [[9., 11.],
   [5.,  7.],
   [1.,  3.]]


   Defined in src/operator/tensor/matrix_op.cc:L413

Parameters

data	Source input
begin	starting indices for the slice operation, supports negative indices.
end	ending indices for the slice operation, supports negative indices.
step	step for the slice operation, supports negative values.

Returns: new symbol

Symbol mxnet::cpp::slice_axis	(	const std::string &	symbol_name,
		Symbol	data,
		int	axis,
		int	begin,
		dmlc::optional< int >	end
	)

inline

Slices along a given axis.

   Returns an array slice along a given `axis` starting from the `begin` index
   to the `end` index.

   Examples::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice_axis(x, axis=0, begin=1, end=3) = [[  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice_axis(x, axis=1, begin=0, end=2) = [[  1.,   2.],
   [  5.,   6.],
   [  9.,  10.]]

   slice_axis(x, axis=1, begin=-3, end=-1) = [[  2.,   3.],
   [  6.,   7.],
   [ 10.,  11.]]


   Defined in src/operator/tensor/matrix_op.cc:L500

Parameters

symbol_name	name of the resulting symbol
data	Source input
axis	Axis along which to be sliced, supports negative indexes.
begin	The beginning index along the axis to be sliced, supports negative
end	The ending index along the axis to be sliced, supports negative indexes.

Returns: new symbol

Symbol mxnet::cpp::slice_axis	(	Symbol	data,
		int	axis,
		int	begin,
		dmlc::optional< int >	end
	)

inline

Slices along a given axis.

   Returns an array slice along a given `axis` starting from the `begin` index
   to the `end` index.

   Examples::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice_axis(x, axis=0, begin=1, end=3) = [[  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   slice_axis(x, axis=1, begin=0, end=2) = [[  1.,   2.],
   [  5.,   6.],
   [  9.,  10.]]

   slice_axis(x, axis=1, begin=-3, end=-1) = [[  2.,   3.],
   [  6.,   7.],
   [ 10.,  11.]]


   Defined in src/operator/tensor/matrix_op.cc:L500

Parameters

data	Source input
axis	Axis along which to be sliced, supports negative indexes.
begin	The beginning index along the axis to be sliced, supports negative
end	The ending index along the axis to be sliced, supports negative indexes.

Returns: new symbol

Symbol mxnet::cpp::slice_like	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	shape_like,
		Shape	axes = `Shape()`
	)

inline

Slices a region of the array like the shape of another array.

   This function is similar to ``slice``, however, the `begin` are always `0`s
   and `end` of specific axes are inferred from the second input `shape_like`.

   Given the second `shape_like` input of ``shape=(d_0, d_1, ..., d_n-1)``,
   a ``slice_like`` operator with default empty `axes`, it performs the
   following operation:

   `` out = slice(input, begin=(0, 0, ..., 0), end=(d_0, d_1, ..., d_n-1))``.

   When `axes` is not empty, it is used to speficy which axes are being sliced.

   Given a 4-d input data, ``slice_like`` operator with ``axes=(0, 2, -1)``
   will perform the following operation:

   `` out = slice(input, begin=(0, 0, 0, 0), end=(d_0, None, d_2, d_3))``.

   Note that it is allowed to have first and second input with different
   however, you have to make sure the `axes` are specified and not exceeding the
   dimension limits.

   For example, given `input_1` with ``shape=(2,3,4,5)`` and `input_2` with
   ``shape=(1,2,3)``, it is not allowed to use:

   `` out = slice_like(a, b)`` because ndim of `input_1` is 4, and ndim of
   is 3.

   The following is allowed in this situation:

   `` out = slice_like(a, b, axes=(0, 2))``

   Example::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   y = [[  0.,   0.,   0.],
   [  0.,   0.,   0.]]

   slice_like(x, y) = [[ 1.,  2.,  3.]
   [ 5.,  6.,  7.]]
   slice_like(x, y, axes=(0, 1)) = [[ 1.,  2.,  3.]
   [ 5.,  6.,  7.]]
   slice_like(x, y, axes=(0)) = [[ 1.,  2.,  3.,  4.]
   [ 5.,  6.,  7.,  8.]]
   slice_like(x, y, axes=(-1)) = [[  1.,   2.,   3.]
   [  5.,   6.,   7.]
   [  9.,  10.,  11.]]


   Defined in src/operator/tensor/matrix_op.cc:L569

Parameters

symbol_name	name of the resulting symbol
data	Source input
shape_like	Shape like input
axes	List of axes on which input data will be sliced according to the corresponding size of the second input. By default will slice on all axes.

Returns: new symbol

Symbol mxnet::cpp::slice_like	(	Symbol	data,
		Symbol	shape_like,
		Shape	axes = `Shape()`
	)

inline

Slices a region of the array like the shape of another array.

   This function is similar to ``slice``, however, the `begin` are always `0`s
   and `end` of specific axes are inferred from the second input `shape_like`.

   Given the second `shape_like` input of ``shape=(d_0, d_1, ..., d_n-1)``,
   a ``slice_like`` operator with default empty `axes`, it performs the
   following operation:

   `` out = slice(input, begin=(0, 0, ..., 0), end=(d_0, d_1, ..., d_n-1))``.

   When `axes` is not empty, it is used to speficy which axes are being sliced.

   Given a 4-d input data, ``slice_like`` operator with ``axes=(0, 2, -1)``
   will perform the following operation:

   `` out = slice(input, begin=(0, 0, 0, 0), end=(d_0, None, d_2, d_3))``.

   Note that it is allowed to have first and second input with different
   however, you have to make sure the `axes` are specified and not exceeding the
   dimension limits.

   For example, given `input_1` with ``shape=(2,3,4,5)`` and `input_2` with
   ``shape=(1,2,3)``, it is not allowed to use:

   `` out = slice_like(a, b)`` because ndim of `input_1` is 4, and ndim of
   is 3.

   The following is allowed in this situation:

   `` out = slice_like(a, b, axes=(0, 2))``

   Example::

   x = [[  1.,   2.,   3.,   4.],
   [  5.,   6.,   7.,   8.],
   [  9.,  10.,  11.,  12.]]

   y = [[  0.,   0.,   0.],
   [  0.,   0.,   0.]]

   slice_like(x, y) = [[ 1.,  2.,  3.]
   [ 5.,  6.,  7.]]
   slice_like(x, y, axes=(0, 1)) = [[ 1.,  2.,  3.]
   [ 5.,  6.,  7.]]
   slice_like(x, y, axes=(0)) = [[ 1.,  2.,  3.,  4.]
   [ 5.,  6.,  7.,  8.]]
   slice_like(x, y, axes=(-1)) = [[  1.,   2.,   3.]
   [  5.,   6.,   7.]
   [  9.,  10.,  11.]]


   Defined in src/operator/tensor/matrix_op.cc:L569

Parameters

data	Source input
shape_like	Shape like input
axes	List of axes on which input data will be sliced according to the corresponding size of the second input. By default will slice on all axes.

Returns: new symbol

Symbol mxnet::cpp::SliceChannel	(	const std::string &	symbol_name,
		Symbol	data,
		int	num_outputs,
		int	axis = `1`,
		bool	squeeze_axis = `false`
	)

inline

Splits an array along a particular axis into multiple sub-arrays.

   .. note:: ``SliceChannel`` is deprecated. Use ``split`` instead.

   **Note** that `num_outputs` should evenly divide the length of the axis
   along which to split the array.

   Example::

   x  = [[[ 1.]
   [ 2.]]
   [[ 3.]
   [ 4.]]
   [[ 5.]
   [ 6.]]]
   x.shape = (3, 2, 1)

   y = split(x, axis=1, num_outputs=2) // a list of 2 arrays with shape (3, 1, 1)
   y = [[[ 1.]]
   [[ 3.]]
   [[ 5.]]]

   [[[ 2.]]
   [[ 4.]]
   [[ 6.]]]

   y[0].shape = (3, 1, 1)

   z = split(x, axis=0, num_outputs=3) // a list of 3 arrays with shape (1, 2, 1)
   z = [[[ 1.]
   [ 2.]]]

   [[[ 3.]
   [ 4.]]]

   [[[ 5.]
   [ 6.]]]

   z[0].shape = (1, 2, 1)

   `squeeze_axis=1` removes the axis with length 1 from the shapes of the output
   **Note** that setting `squeeze_axis` to ``1`` removes axis with length 1 only
   along the `axis` which it is split.
   Also `squeeze_axis` can be set to true only if ``input.shape[axis] ==

   Example::

   z = split(x, axis=0, num_outputs=3, squeeze_axis=1) // a list of 3 arrays with
   z = [[ 1.]
   [ 2.]]

   [[ 3.]
   [ 4.]]

   [[ 5.]
   [ 6.]]
   z[0].shape = (2 ,1 )



   Defined in src/operator/slice_channel.cc:L107

Parameters

symbol_name	name of the resulting symbol
data	The input
num_outputs	Number of splits. Note that this should evenly divide the length of
axis	Axis along which to split.
squeeze_axis	If true, Removes the axis with length 1 from the shapes of the output arrays. Note that setting `squeeze_axis` to `true` removes axis with length 1 only along the `axis` which it is split. Also `squeeze_axis` can

Returns: new symbol

Symbol mxnet::cpp::SliceChannel	(	Symbol	data,
		int	num_outputs,
		int	axis = `1`,
		bool	squeeze_axis = `false`
	)

inline

Splits an array along a particular axis into multiple sub-arrays.

   .. note:: ``SliceChannel`` is deprecated. Use ``split`` instead.

   **Note** that `num_outputs` should evenly divide the length of the axis
   along which to split the array.

   Example::

   x  = [[[ 1.]
   [ 2.]]
   [[ 3.]
   [ 4.]]
   [[ 5.]
   [ 6.]]]
   x.shape = (3, 2, 1)

   y = split(x, axis=1, num_outputs=2) // a list of 2 arrays with shape (3, 1, 1)
   y = [[[ 1.]]
   [[ 3.]]
   [[ 5.]]]

   [[[ 2.]]
   [[ 4.]]
   [[ 6.]]]

   y[0].shape = (3, 1, 1)

   z = split(x, axis=0, num_outputs=3) // a list of 3 arrays with shape (1, 2, 1)
   z = [[[ 1.]
   [ 2.]]]

   [[[ 3.]
   [ 4.]]]

   [[[ 5.]
   [ 6.]]]

   z[0].shape = (1, 2, 1)

   `squeeze_axis=1` removes the axis with length 1 from the shapes of the output
   **Note** that setting `squeeze_axis` to ``1`` removes axis with length 1 only
   along the `axis` which it is split.
   Also `squeeze_axis` can be set to true only if ``input.shape[axis] ==

   Example::

   z = split(x, axis=0, num_outputs=3, squeeze_axis=1) // a list of 3 arrays with
   z = [[ 1.]
   [ 2.]]

   [[ 3.]
   [ 4.]]

   [[ 5.]
   [ 6.]]
   z[0].shape = (2 ,1 )



   Defined in src/operator/slice_channel.cc:L107

Parameters

data	The input
num_outputs	Number of splits. Note that this should evenly divide the length of
axis	Axis along which to split.
squeeze_axis	If true, Removes the axis with length 1 from the shapes of the output arrays. Note that setting `squeeze_axis` to `true` removes axis with length 1 only along the `axis` which it is split. Also `squeeze_axis` can

Returns: new symbol

Symbol mxnet::cpp::smooth_l1	(	const std::string &	symbol_name,
		Symbol	data,
		mx_float	scalar
	)

inline

Calculate Smooth L1 Loss(lhs, scalar) by summing

   .. math::

   f(x) =
   \begin{cases}
   (\sigma x)^2/2,& \text{if }x < 1/\sigma^2\\
   |x|-0.5/\sigma^2,& \text{otherwise}
   \end{cases}

   where :math:`x` is an element of the tensor *lhs* and :math:`\sigma` is the

   Example::

   smooth_l1([1, 2, 3, 4], scalar=1) = [0.5, 1.5, 2.5, 3.5]



   Defined in src/operator/tensor/elemwise_binary_scalar_op_extended.cc:L103

Parameters

symbol_name	name of the resulting symbol
data	source input
scalar	scalar input

Returns: new symbol

Symbol mxnet::cpp::smooth_l1	(	Symbol	data,
		mx_float	scalar
	)

inline

Calculate Smooth L1 Loss(lhs, scalar) by summing

   .. math::

   f(x) =
   \begin{cases}
   (\sigma x)^2/2,& \text{if }x < 1/\sigma^2\\
   |x|-0.5/\sigma^2,& \text{otherwise}
   \end{cases}

   where :math:`x` is an element of the tensor *lhs* and :math:`\sigma` is the

   Example::

   smooth_l1([1, 2, 3, 4], scalar=1) = [0.5, 1.5, 2.5, 3.5]



   Defined in src/operator/tensor/elemwise_binary_scalar_op_extended.cc:L103

Parameters

data	source input
scalar	scalar input

Returns: new symbol

Symbol mxnet::cpp::softmax	(	const std::string &	symbol_name,
		Symbol	data,
		int	axis = `-1`,
		dmlc::optional< double >	temperature = `dmlc::optional<double>()`
	)

inline

Applies the softmax function.

   The resulting array contains elements in the range (0,1) and the elements along

   .. math::
   softmax(\mathbf{z/t})_j = \frac{e^{z_j/t}}{\sum_{k=1}^K e^{z_k/t}}

   for :math:`j = 1, ..., K`

   t is the temperature parameter in softmax function. By default, t equals 1.0

   Example::

   x = [[ 1.  1.  1.]
   [ 1.  1.  1.]]

   softmax(x,axis=0) = [[ 0.5  0.5  0.5]
   [ 0.5  0.5  0.5]]

   softmax(x,axis=1) = [[ 0.33333334,  0.33333334,  0.33333334],
   [ 0.33333334,  0.33333334,  0.33333334]]



   Defined in src/operator/nn/softmax.cc:L100

Parameters

symbol_name	name of the resulting symbol
data	The input array.
axis	The axis along which to compute softmax.
temperature	Temperature parameter in softmax

Returns: new symbol

Symbol mxnet::cpp::Softmax	(	const std::string &	symbol_name,
		Symbol	data,
		mx_float	grad_scale = `1`,
		mx_float	ignore_label = `-1`,
		bool	multi_output = `false`,
		bool	use_ignore = `false`,
		bool	preserve_shape = `false`,
		SoftmaxNormalization	normalization = `SoftmaxNormalization::kNull`,
		bool	out_grad = `false`,
		mx_float	smooth_alpha = `0`
	)

inline

Please use SoftmaxOutput.

   .. note::

   This operator has been renamed to `SoftmaxOutput`, which
   computes the gradient of cross-entropy loss w.r.t softmax output.
   To just compute softmax output, use the `softmax` operator.



   Defined in src/operator/softmax_output.cc:L138

Parameters

symbol_name	name of the resulting symbol
data	Input array.
grad_scale	Scales the gradient by a float factor.
ignore_label	The instances whose `labels` == `ignore_label` will be ignored
multi_output	If set to `true`, the softmax function will be computed along axis `1`. This is applied when the shape of input array differs from the
use_ignore	If set to `true`, the `ignore_label` value will not contribute to
preserve_shape	If set to `true`, the softmax function will be computed along
normalization	Normalizes the gradient.
out_grad	Multiplies gradient with output gradient element-wise.
smooth_alpha	Constant for computing a label smoothed version of cross-entropyfor the backwards pass. This constant gets subtracted from theone-hot encoding of the gold label and distributed uniformly toall other

Returns: new symbol

Symbol mxnet::cpp::softmax	(	Symbol	data,
		int	axis = `-1`,
		dmlc::optional< double >	temperature = `dmlc::optional<double>()`
	)

inline

Applies the softmax function.

   The resulting array contains elements in the range (0,1) and the elements along

   .. math::
   softmax(\mathbf{z/t})_j = \frac{e^{z_j/t}}{\sum_{k=1}^K e^{z_k/t}}

   for :math:`j = 1, ..., K`

   t is the temperature parameter in softmax function. By default, t equals 1.0

   Example::

   x = [[ 1.  1.  1.]
   [ 1.  1.  1.]]

   softmax(x,axis=0) = [[ 0.5  0.5  0.5]
   [ 0.5  0.5  0.5]]

   softmax(x,axis=1) = [[ 0.33333334,  0.33333334,  0.33333334],
   [ 0.33333334,  0.33333334,  0.33333334]]



   Defined in src/operator/nn/softmax.cc:L100

Parameters

data	The input array.
axis	The axis along which to compute softmax.
temperature	Temperature parameter in softmax

Returns: new symbol

Symbol mxnet::cpp::Softmax	(	Symbol	data,
		mx_float	grad_scale = `1`,
		mx_float	ignore_label = `-1`,
		bool	multi_output = `false`,
		bool	use_ignore = `false`,
		bool	preserve_shape = `false`,
		SoftmaxNormalization	normalization = `SoftmaxNormalization::kNull`,
		bool	out_grad = `false`,
		mx_float	smooth_alpha = `0`
	)

inline

Please use SoftmaxOutput.

   .. note::

   This operator has been renamed to `SoftmaxOutput`, which
   computes the gradient of cross-entropy loss w.r.t softmax output.
   To just compute softmax output, use the `softmax` operator.



   Defined in src/operator/softmax_output.cc:L138

Parameters

data	Input array.
grad_scale	Scales the gradient by a float factor.
ignore_label	The instances whose `labels` == `ignore_label` will be ignored
multi_output	If set to `true`, the softmax function will be computed along axis `1`. This is applied when the shape of input array differs from the
use_ignore	If set to `true`, the `ignore_label` value will not contribute to
preserve_shape	If set to `true`, the softmax function will be computed along
normalization	Normalizes the gradient.
out_grad	Multiplies gradient with output gradient element-wise.
smooth_alpha	Constant for computing a label smoothed version of cross-entropyfor the backwards pass. This constant gets subtracted from theone-hot encoding of the gold label and distributed uniformly toall other

Returns: new symbol

Symbol mxnet::cpp::softmax_cross_entropy	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	label
	)

inline

Calculate cross entropy of softmax output and one-hot label.

   - This operator computes the cross entropy in two steps:
   - Applies softmax function on the input array.
   - Computes and returns the cross entropy loss between the softmax output and

   - The softmax function and cross entropy loss is given by:

   - Softmax Function:

   .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}

   - Cross Entropy Function:

   .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i

   Example::

   x = [[1, 2, 3],
   [11, 7, 5]]

   label = [2, 0]

   softmax(x) = [[0.09003057, 0.24472848, 0.66524094],
   [0.97962922, 0.01794253, 0.00242826]]

   softmax_cross_entropy(data, label) = - log(0.66524084) - log(0.97962922) =



   Defined in src/operator/loss_binary_op.cc:L59

Parameters

symbol_name	name of the resulting symbol
data	Input data
label	Input label

Returns: new symbol

Symbol mxnet::cpp::softmax_cross_entropy	(	Symbol	data,
		Symbol	label
	)

inline

Calculate cross entropy of softmax output and one-hot label.

   - This operator computes the cross entropy in two steps:
   - Applies softmax function on the input array.
   - Computes and returns the cross entropy loss between the softmax output and

   - The softmax function and cross entropy loss is given by:

   - Softmax Function:

   .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}

   - Cross Entropy Function:

   .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i

   Example::

   x = [[1, 2, 3],
   [11, 7, 5]]

   label = [2, 0]

   softmax(x) = [[0.09003057, 0.24472848, 0.66524094],
   [0.97962922, 0.01794253, 0.00242826]]

   softmax_cross_entropy(data, label) = - log(0.66524084) - log(0.97962922) =



   Defined in src/operator/loss_binary_op.cc:L59

Parameters

data	Input data
label	Input label

Returns: new symbol

Symbol mxnet::cpp::SoftmaxActivation	(	const std::string &	symbol_name,
		Symbol	data,
		SoftmaxActivationMode	mode = `SoftmaxActivationMode::kInstance`
	)

inline

Applies softmax activation to input. This is intended for internal layers.

   .. note::

   This operator has been deprecated, please use `softmax`.

   If `mode` = ``instance``, this operator will compute a softmax for each
   This is the default mode.

   If `mode` = ``channel``, this operator will compute a k-class softmax at each
   of each instance, where `k` = ``num_channel``. This mode can only be used when
   has at least 3 dimensions.
   This can be used for `fully convolutional network`, `image segmentation`, etc.

   Example::

   >>> input_array = mx.nd.array([[3., 0.5, -0.5, 2., 7.],
   >>>                            [2., -.4, 7.,   3., 0.2]])
   >>> softmax_act = mx.nd.SoftmaxActivation(input_array)
   >>> print softmax_act.asnumpy()
   [[  1.78322066e-02   1.46375655e-03   5.38485940e-04   6.56010211e-03
   [  6.56221947e-03   5.95310994e-04   9.73919690e-01   1.78379621e-02



   Defined in src/operator/nn/softmax_activation.cc:L59

Parameters

symbol_name	name of the resulting symbol
data	The input array.
mode	Specifies how to compute the softmax. If set to `instance`, it computes softmax for each instance. If set to `channel`, It computes cross channel

Returns: new symbol

Symbol mxnet::cpp::SoftmaxActivation	(	Symbol	data,
		SoftmaxActivationMode	mode = `SoftmaxActivationMode::kInstance`
	)

inline

Applies softmax activation to input. This is intended for internal layers.

   .. note::

   This operator has been deprecated, please use `softmax`.

   If `mode` = ``instance``, this operator will compute a softmax for each
   This is the default mode.

   If `mode` = ``channel``, this operator will compute a k-class softmax at each
   of each instance, where `k` = ``num_channel``. This mode can only be used when
   has at least 3 dimensions.
   This can be used for `fully convolutional network`, `image segmentation`, etc.

   Example::

   >>> input_array = mx.nd.array([[3., 0.5, -0.5, 2., 7.],
   >>>                            [2., -.4, 7.,   3., 0.2]])
   >>> softmax_act = mx.nd.SoftmaxActivation(input_array)
   >>> print softmax_act.asnumpy()
   [[  1.78322066e-02   1.46375655e-03   5.38485940e-04   6.56010211e-03
   [  6.56221947e-03   5.95310994e-04   9.73919690e-01   1.78379621e-02



   Defined in src/operator/nn/softmax_activation.cc:L59

Parameters

data	The input array.
mode	Specifies how to compute the softmax. If set to `instance`, it computes softmax for each instance. If set to `channel`, It computes cross channel

Returns: new symbol

Symbol mxnet::cpp::SoftmaxOutput	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	label,
		mx_float	grad_scale = `1`,
		mx_float	ignore_label = `-1`,
		bool	multi_output = `false`,
		bool	use_ignore = `false`,
		bool	preserve_shape = `false`,
		SoftmaxOutputNormalization	normalization = `SoftmaxOutputNormalization::kNull`,
		bool	out_grad = `false`,
		mx_float	smooth_alpha = `0`
	)

inline

Computes the gradient of cross entropy loss with respect to softmax output.

   - This operator computes the gradient in two steps.
   The cross entropy loss does not actually need to be computed.

   - Applies softmax function on the input array.
   - Computes and returns the gradient of cross entropy loss w.r.t. the softmax

   - The softmax function, cross entropy loss and gradient is given by:

   - Softmax Function:

   .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}

   - Cross Entropy Function:

   .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i

   - The gradient of cross entropy loss w.r.t softmax output:

   .. math:: \text{gradient} = \text{output} - \text{label}

   - During forward propagation, the softmax function is computed for each

   For general *N*-D input arrays with shape :math:`(d_1, d_2, ..., d_n)`. The
   :math:`s=d_1 \cdot d_2 \cdot \cdot \cdot d_n`. We can use the parameters
   and `multi_output` to specify the way to compute softmax:

   - By default, `preserve_shape` is ``false``. This operator will reshape the
   into a 2-D array with shape :math:`(d_1, \frac{s}{d_1})` and then compute the
   each row in the reshaped array, and afterwards reshape it back to the original
   :math:`(d_1, d_2, ..., d_n)`.
   - If `preserve_shape` is ``true``, the softmax function will be computed along
   the last axis (`axis` = ``-1``).
   - If `multi_output` is ``true``, the softmax function will be computed along
   the second axis (`axis` = ``1``).

   - During backward propagation, the gradient of cross-entropy loss w.r.t softmax
   The provided label can be a one-hot label array or a probability label array.

   - If the parameter `use_ignore` is ``true``, `ignore_label` can specify input
   with a particular label to be ignored during backward propagation. **This has
   softmax `output` has same shape as `label`**.

   Example::

   data = [[1,2,3,4],[2,2,2,2],[3,3,3,3],[4,4,4,4]]
   label = [1,0,2,3]
   ignore_label = 1
   SoftmaxOutput(data=data, label = label,\
   multi_output=true, use_ignore=true,\
   ignore_label=ignore_label)
   ## forward softmax output
   [[ 0.0320586   0.08714432  0.23688284  0.64391428]
   [ 0.25        0.25        0.25        0.25      ]
   [ 0.25        0.25        0.25        0.25      ]
   [ 0.25        0.25        0.25        0.25      ]]
   ## backward gradient output
   [[ 0.    0.    0.    0.  ]
   [-0.75  0.25  0.25  0.25]
   [ 0.25  0.25 -0.75  0.25]
   [ 0.25  0.25  0.25 -0.75]]
   ## notice that the first row is all 0 because label[0] is 1, which is equal to

   - The parameter `grad_scale` can be used to rescale the gradient, which is
   give each loss function different weights.

   - This operator also supports various ways to normalize the gradient by
   The `normalization` is applied if softmax output has different shape than the
   The `normalization` mode can be set to the followings:

   - ``'null'``: do nothing.
   - ``'batch'``: divide the gradient by the batch size.
   - ``'valid'``: divide the gradient by the number of instances which are not



   Defined in src/operator/softmax_output.cc:L123

Parameters

symbol_name	name of the resulting symbol
data	Input array.
label	Ground truth label.
grad_scale	Scales the gradient by a float factor.
ignore_label	The instances whose `labels` == `ignore_label` will be ignored
multi_output	If set to `true`, the softmax function will be computed along axis `1`. This is applied when the shape of input array differs from the
use_ignore	If set to `true`, the `ignore_label` value will not contribute to
preserve_shape	If set to `true`, the softmax function will be computed along
normalization	Normalizes the gradient.
out_grad	Multiplies gradient with output gradient element-wise.
smooth_alpha	Constant for computing a label smoothed version of cross-entropyfor the backwards pass. This constant gets subtracted from theone-hot encoding of the gold label and distributed uniformly toall other

Returns: new symbol

Symbol mxnet::cpp::SoftmaxOutput	(	Symbol	data,
		Symbol	label,
		mx_float	grad_scale = `1`,
		mx_float	ignore_label = `-1`,
		bool	multi_output = `false`,
		bool	use_ignore = `false`,
		bool	preserve_shape = `false`,
		SoftmaxOutputNormalization	normalization = `SoftmaxOutputNormalization::kNull`,
		bool	out_grad = `false`,
		mx_float	smooth_alpha = `0`
	)

inline

Computes the gradient of cross entropy loss with respect to softmax output.

   - This operator computes the gradient in two steps.
   The cross entropy loss does not actually need to be computed.

   - Applies softmax function on the input array.
   - Computes and returns the gradient of cross entropy loss w.r.t. the softmax

   - The softmax function, cross entropy loss and gradient is given by:

   - Softmax Function:

   .. math:: \text{softmax}(x)_i = \frac{exp(x_i)}{\sum_j exp(x_j)}

   - Cross Entropy Function:

   .. math:: \text{CE(label, output)} = - \sum_i \text{label}_i

   - The gradient of cross entropy loss w.r.t softmax output:

   .. math:: \text{gradient} = \text{output} - \text{label}

   - During forward propagation, the softmax function is computed for each

   For general *N*-D input arrays with shape :math:`(d_1, d_2, ..., d_n)`. The
   :math:`s=d_1 \cdot d_2 \cdot \cdot \cdot d_n`. We can use the parameters
   and `multi_output` to specify the way to compute softmax:

   - By default, `preserve_shape` is ``false``. This operator will reshape the
   into a 2-D array with shape :math:`(d_1, \frac{s}{d_1})` and then compute the
   each row in the reshaped array, and afterwards reshape it back to the original
   :math:`(d_1, d_2, ..., d_n)`.
   - If `preserve_shape` is ``true``, the softmax function will be computed along
   the last axis (`axis` = ``-1``).
   - If `multi_output` is ``true``, the softmax function will be computed along
   the second axis (`axis` = ``1``).

   - During backward propagation, the gradient of cross-entropy loss w.r.t softmax
   The provided label can be a one-hot label array or a probability label array.

   - If the parameter `use_ignore` is ``true``, `ignore_label` can specify input
   with a particular label to be ignored during backward propagation. **This has
   softmax `output` has same shape as `label`**.

   Example::

   data = [[1,2,3,4],[2,2,2,2],[3,3,3,3],[4,4,4,4]]
   label = [1,0,2,3]
   ignore_label = 1
   SoftmaxOutput(data=data, label = label,\
   multi_output=true, use_ignore=true,\
   ignore_label=ignore_label)
   ## forward softmax output
   [[ 0.0320586   0.08714432  0.23688284  0.64391428]
   [ 0.25        0.25        0.25        0.25      ]
   [ 0.25        0.25        0.25        0.25      ]
   [ 0.25        0.25        0.25        0.25      ]]
   ## backward gradient output
   [[ 0.    0.    0.    0.  ]
   [-0.75  0.25  0.25  0.25]
   [ 0.25  0.25 -0.75  0.25]
   [ 0.25  0.25  0.25 -0.75]]
   ## notice that the first row is all 0 because label[0] is 1, which is equal to

   - The parameter `grad_scale` can be used to rescale the gradient, which is
   give each loss function different weights.

   - This operator also supports various ways to normalize the gradient by
   The `normalization` is applied if softmax output has different shape than the
   The `normalization` mode can be set to the followings:

   - ``'null'``: do nothing.
   - ``'batch'``: divide the gradient by the batch size.
   - ``'valid'``: divide the gradient by the number of instances which are not



   Defined in src/operator/softmax_output.cc:L123

Parameters

data	Input array.
label	Ground truth label.
grad_scale	Scales the gradient by a float factor.
ignore_label	The instances whose `labels` == `ignore_label` will be ignored
multi_output	If set to `true`, the softmax function will be computed along axis `1`. This is applied when the shape of input array differs from the
use_ignore	If set to `true`, the `ignore_label` value will not contribute to
preserve_shape	If set to `true`, the softmax function will be computed along
normalization	Normalizes the gradient.
out_grad	Multiplies gradient with output gradient element-wise.
smooth_alpha	Constant for computing a label smoothed version of cross-entropyfor the backwards pass. This constant gets subtracted from theone-hot encoding of the gold label and distributed uniformly toall other

Returns: new symbol

Symbol mxnet::cpp::softsign	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Computes softsign of x element-wise.

   .. math::
   y = x / (1 + abs(x))

   The storage type of ``softsign`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L145

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::softsign ( Symbol data )

inline

Computes softsign of x element-wise.

   .. math::
   y = x / (1 + abs(x))

   The storage type of ``softsign`` output is always dense



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L145

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::sort	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>(-1)`,
		bool	is_ascend = `true`
	)

inline

Returns a sorted copy of an input array along the given axis.

   Examples::

   x = [[ 1, 4],
   [ 3, 1]]

   // sorts along the last axis
   sort(x) = [[ 1.,  4.],
   [ 1.,  3.]]

   // flattens and then sorts
   sort(x) = [ 1.,  1.,  3.,  4.]

   // sorts along the first axis
   sort(x, axis=0) = [[ 1.,  1.],
   [ 3.,  4.]]

   // in a descend order
   sort(x, is_ascend=0) = [[ 4.,  1.],
   [ 3.,  1.]]



   Defined in src/operator/tensor/ordering_op.cc:L127

Parameters

symbol_name	name of the resulting symbol
data	The input array
axis	Axis along which to choose sort the input tensor. If not given, the
is_ascend	Whether to sort in ascending or descending order.

Returns: new symbol

Symbol mxnet::cpp::sort	(	Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>(-1)`,
		bool	is_ascend = `true`
	)

inline

Returns a sorted copy of an input array along the given axis.

   Examples::

   x = [[ 1, 4],
   [ 3, 1]]

   // sorts along the last axis
   sort(x) = [[ 1.,  4.],
   [ 1.,  3.]]

   // flattens and then sorts
   sort(x) = [ 1.,  1.,  3.,  4.]

   // sorts along the first axis
   sort(x, axis=0) = [[ 1.,  1.],
   [ 3.,  4.]]

   // in a descend order
   sort(x, is_ascend=0) = [[ 4.,  1.],
   [ 3.,  1.]]



   Defined in src/operator/tensor/ordering_op.cc:L127

Parameters

data	The input array
axis	Axis along which to choose sort the input tensor. If not given, the
is_ascend	Whether to sort in ascending or descending order.

Returns: new symbol

Symbol mxnet::cpp::space_to_depth	(	const std::string &	symbol_name,
		Symbol	data,
		int	block_size
	)

inline

Rearranges(permutes) blocks of spatial data into depth. Similar to ONNX SpaceToDepth operator: https://github.com/onnx/onnx/blob/master/docs/Operators.md#SpaceToDepth

The output is a new tensor where the values from height and width dimension are moved to the depth dimension. The reverse of this operation is

.. math::

{gather*} x = reshape(x, [N, C, H / block_size, block_size, W / block_size, x = transpose(x , [0, 3, 5, 1, 2, 4]) \ y = reshape(x , [N, C * (block_size ^ 2), H / block_size, W / {gather*}

where :math:x is an input tensor with default layout as :math:[N, C, H, W]: and :math:y is the output tensor of layout :math:`[N, C * (block_size ^ 2),

Example::

x = [[[[0, 6, 1, 7, 2, 8], [12, 18, 13, 19, 14, 20], [3, 9, 4, 10, 5, 11], [15, 21, 16, 22, 17, 23]]]]

   space_to_depth(x, 2) = [[[[0, 1, 2],
   [3, 4, 5]],
   [[6, 7, 8],
   [9, 10, 11]],
   [[12, 13, 14],
   [15, 16, 17]],
   [[18, 19, 20],
   [21, 22, 23]]]]


   Defined in src/operator/tensor/matrix_op.cc:L999

Parameters

symbol_name	name of the resulting symbol
data	Input ndarray
block_size	Blocks of [block_size. block_size] are moved

Returns: new symbol

Symbol mxnet::cpp::space_to_depth	(	Symbol	data,
		int	block_size
	)

inline

Rearranges(permutes) blocks of spatial data into depth. Similar to ONNX SpaceToDepth operator: https://github.com/onnx/onnx/blob/master/docs/Operators.md#SpaceToDepth

The output is a new tensor where the values from height and width dimension are moved to the depth dimension. The reverse of this operation is

.. math::

{gather*} x = reshape(x, [N, C, H / block_size, block_size, W / block_size, x = transpose(x , [0, 3, 5, 1, 2, 4]) \ y = reshape(x , [N, C * (block_size ^ 2), H / block_size, W / {gather*}

where :math:x is an input tensor with default layout as :math:[N, C, H, W]: and :math:y is the output tensor of layout :math:`[N, C * (block_size ^ 2),

Example::

x = [[[[0, 6, 1, 7, 2, 8], [12, 18, 13, 19, 14, 20], [3, 9, 4, 10, 5, 11], [15, 21, 16, 22, 17, 23]]]]

   space_to_depth(x, 2) = [[[[0, 1, 2],
   [3, 4, 5]],
   [[6, 7, 8],
   [9, 10, 11]],
   [[12, 13, 14],
   [15, 16, 17]],
   [[18, 19, 20],
   [21, 22, 23]]]]


   Defined in src/operator/tensor/matrix_op.cc:L999

Parameters

data	Input ndarray
block_size	Blocks of [block_size. block_size] are moved

Returns: new symbol

Symbol mxnet::cpp::SpatialTransformer	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	loc,
		SpatialTransformerTransformType	transform_type,
		SpatialTransformerSamplerType	sampler_type,
		Shape	target_shape = `Shape(0,0)`
	)

inline

Applies a spatial transformer to input feature map.

Parameters

symbol_name	name of the resulting symbol
data	Input data to the SpatialTransformerOp.
loc	localisation net, the output dim should be 6 when transform_type is affine.
transform_type	transformation type
sampler_type	sampling type
target_shape	output shape(h, w) of spatial transformer: (y, x)

Returns: new symbol

Symbol mxnet::cpp::SpatialTransformer	(	Symbol	data,
		Symbol	loc,
		SpatialTransformerTransformType	transform_type,
		SpatialTransformerSamplerType	sampler_type,
		Shape	target_shape = `Shape(0,0)`
	)

inline

Applies a spatial transformer to input feature map.

Parameters

data	Input data to the SpatialTransformerOp.
loc	localisation net, the output dim should be 6 when transform_type is affine.
transform_type	transformation type
sampler_type	sampling type
target_shape	output shape(h, w) of spatial transformer: (y, x)

Returns: new symbol

Symbol mxnet::cpp::sqrt	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise square-root value of the input.

   .. math::
   \textrm{sqrt}(x) = \sqrt{x}

   Example::

   sqrt([4, 9, 16]) = [2, 3, 4]

   The storage type of ``sqrt`` output depends upon the input storage type:

   - sqrt(default) = default
   - sqrt(row_sparse) = row_sparse
   - sqrt(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L846

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::sqrt ( Symbol data )

inline

Returns element-wise square-root value of the input.

   .. math::
   \textrm{sqrt}(x) = \sqrt{x}

   Example::

   sqrt([4, 9, 16]) = [2, 3, 4]

   The storage type of ``sqrt`` output depends upon the input storage type:

   - sqrt(default) = default
   - sqrt(row_sparse) = row_sparse
   - sqrt(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L846

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::square	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns element-wise squared value of the input.

   .. math::
   square(x) = x^2

   Example::

   square([2, 3, 4]) = [4, 9, 16]

   The storage type of ``square`` output depends upon the input storage type:

   - square(default) = default
   - square(row_sparse) = row_sparse
   - square(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L823

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::square ( Symbol data )

inline

Returns element-wise squared value of the input.

   .. math::
   square(x) = x^2

   Example::

   square([2, 3, 4]) = [4, 9, 16]

   The storage type of ``square`` output depends upon the input storage type:

   - square(default) = default
   - square(row_sparse) = row_sparse
   - square(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L823

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::squeeze	(	const std::string &	symbol_name,
		const std::vector< Symbol > &	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`
	)

inline

Remove single-dimensional entries from the shape of an array. Same behavior of defining the output tensor shape as numpy.squeeze for the most See the following note for exception.

Examples::

data = [[[0], [1], [2]]] squeeze(data) = [0, 1, 2] squeeze(data, axis=0) = [[0], [1], [2]] squeeze(data, axis=2) = [[0, 1, 2]] squeeze(data, axis=(0, 2)) = [0, 1, 2]

.. Note:: The output of this operator will keep at least one dimension not removed. For squeeze([[[4]]]) = [4], while in numpy.squeeze, the output will become a scalar.

Parameters

symbol_name	name of the resulting symbol
data	data to squeeze
axis	Selects a subset of the single-dimensional entries in the shape. If an

Returns: new symbol

Symbol mxnet::cpp::squeeze	(	const std::vector< Symbol > &	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`
	)

inline

Remove single-dimensional entries from the shape of an array. Same behavior of defining the output tensor shape as numpy.squeeze for the most See the following note for exception.

Examples::

data = [[[0], [1], [2]]] squeeze(data) = [0, 1, 2] squeeze(data, axis=0) = [[0], [1], [2]] squeeze(data, axis=2) = [[0, 1, 2]] squeeze(data, axis=(0, 2)) = [0, 1, 2]

.. Note:: The output of this operator will keep at least one dimension not removed. For squeeze([[[4]]]) = [4], while in numpy.squeeze, the output will become a scalar.

Parameters

data	data to squeeze
axis	Selects a subset of the single-dimensional entries in the shape. If an

Returns: new symbol

Symbol mxnet::cpp::stack	(	const std::string &	symbol_name,
		const std::vector< Symbol > &	data,
		int	num_args,
		int	axis = `0`
	)

inline

Join a sequence of arrays along a new axis.

   The axis parameter specifies the index of the new axis in the dimensions of the
   result. For example, if axis=0 it will be the first dimension and if axis=-1 it
   will be the last dimension.

   Examples::

   x = [1, 2]
   y = [3, 4]

   stack(x, y) = [[1, 2],
   [3, 4]]
   stack(x, y, axis=1) = [[1, 3],
   [2, 4]]

Parameters

symbol_name	name of the resulting symbol
data	List of arrays to stack
num_args	Number of inputs to be stacked.
axis	The axis in the result array along which the input arrays are stacked.

Returns: new symbol

Symbol mxnet::cpp::stack	(	const std::vector< Symbol > &	data,
		int	num_args,
		int	axis = `0`
	)

inline

Join a sequence of arrays along a new axis.

   The axis parameter specifies the index of the new axis in the dimensions of the
   result. For example, if axis=0 it will be the first dimension and if axis=-1 it
   will be the last dimension.

   Examples::

   x = [1, 2]
   y = [3, 4]

   stack(x, y) = [[1, 2],
   [3, 4]]
   stack(x, y, axis=1) = [[1, 3],
   [2, 4]]

Parameters

data	List of arrays to stack
num_args	Number of inputs to be stacked.
axis	The axis in the result array along which the input arrays are stacked.

Returns: new symbol

Symbol mxnet::cpp::sum	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the sum of array elements over given axes.

   .. Note::

   `sum` and `sum_axis` are equivalent.
   For ndarray of csr storage type summation along axis 0 and axis 1 is supported.
   Setting keepdims or exclude to True will cause a fallback to dense operator.

   Example::

   data = [[[1, 2], [2, 3], [1, 3]],
   [[1, 4], [4, 3], [5, 2]],
   [[7, 1], [7, 2], [7, 3]]]

   sum(data, axis=1)
   [[  4.   8.]
   [ 10.   9.]
   [ 21.   6.]]

   sum(data, axis=[1,2])
   [ 12.  19.  27.]

   data = [[1, 2, 0],
   [3, 0, 1],
   [4, 1, 0]]

   csr = cast_storage(data, 'csr')

   sum(csr, axis=0)
   [ 8.  3.  1.]

   sum(csr, axis=1)
   [ 3.  4.  5.]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L115

Parameters

symbol_name	name of the resulting symbol
data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::sum	(	Symbol	data,
		dmlc::optional< Shape >	axis = `dmlc::optional<Shape>()`,
		bool	keepdims = `false`,
		bool	exclude = `false`
	)

inline

Computes the sum of array elements over given axes.

   .. Note::

   `sum` and `sum_axis` are equivalent.
   For ndarray of csr storage type summation along axis 0 and axis 1 is supported.
   Setting keepdims or exclude to True will cause a fallback to dense operator.

   Example::

   data = [[[1, 2], [2, 3], [1, 3]],
   [[1, 4], [4, 3], [5, 2]],
   [[7, 1], [7, 2], [7, 3]]]

   sum(data, axis=1)
   [[  4.   8.]
   [ 10.   9.]
   [ 21.   6.]]

   sum(data, axis=[1,2])
   [ 12.  19.  27.]

   data = [[1, 2, 0],
   [3, 0, 1],
   [4, 1, 0]]

   csr = cast_storage(data, 'csr')

   sum(csr, axis=0)
   [ 8.  3.  1.]

   sum(csr, axis=1)
   [ 3.  4.  5.]



   Defined in src/operator/tensor/broadcast_reduce_op_value.cc:L115

Parameters

data	The input
axis	The axis or axes along which to perform the reduction. The default, `axis=()`, will compute over all elements into a scalar array with shape `(1,)`. If `axis` is int, a reduction is performed on a particular axis. If `axis` is a tuple of ints, a reduction is performed on all the axes specified in the tuple. If `exclude` is true, reduction will be performed on the axes that are NOT in axis instead. Negative values means indexing from right to left.
keepdims	If this is set to `True`, the reduced axes are left in the result as
exclude	Whether to perform reduction on axis that are NOT in axis instead.

Returns: new symbol

Symbol mxnet::cpp::SVMOutput	(	const std::string &	symbol_name,
		Symbol	data,
		Symbol	label,
		mx_float	margin = `1`,
		mx_float	regularization_coefficient = `1`,
		bool	use_linear = `false`
	)

inline

Computes support vector machine based transformation of the input.

   This tutorial demonstrates using SVM as output layer for classification instead
   https://github.com/dmlc/mxnet/tree/master/example/svm_mnist.

Parameters

symbol_name	name of the resulting symbol
data	Input data for SVM transformation.
label	Class label for the input data.
margin	The loss function penalizes outputs that lie outside this margin.
regularization_coefficient	Regularization parameter for the SVM. This balances
use_linear	Whether to use L1-SVM objective. L2-SVM objective is used by default.

Returns: new symbol

Symbol mxnet::cpp::SVMOutput	(	Symbol	data,
		Symbol	label,
		mx_float	margin = `1`,
		mx_float	regularization_coefficient = `1`,
		bool	use_linear = `false`
	)

inline

Computes support vector machine based transformation of the input.

   This tutorial demonstrates using SVM as output layer for classification instead
   https://github.com/dmlc/mxnet/tree/master/example/svm_mnist.

Parameters

data	Input data for SVM transformation.
label	Class label for the input data.
margin	The loss function penalizes outputs that lie outside this margin.
regularization_coefficient	Regularization parameter for the SVM. This balances
use_linear	Whether to use L1-SVM objective. L2-SVM objective is used by default.

Returns: new symbol

Symbol mxnet::cpp::SwapAxis	(	const std::string &	symbol_name,
		Symbol	data,
		uint32_t	dim1 = `0`,
		uint32_t	dim2 = `0`
	)

inline

Interchanges two axes of an array.

   Examples::

   x = [[1, 2, 3]])
   swapaxes(x, 0, 1) = [[ 1],
   [ 2],
   [ 3]]

   x = [[[ 0, 1],
   [ 2, 3]],
   [[ 4, 5],
   [ 6, 7]]]  // (2,2,2) array

   swapaxes(x, 0, 2) = [[[ 0, 4],
   [ 2, 6]],
   [[ 1, 5],
   [ 3, 7]]]


   Defined in src/operator/swapaxis.cc:L70

Parameters

symbol_name	name of the resulting symbol
data	Input array.
dim1	the first axis to be swapped.
dim2	the second axis to be swapped.

Returns: new symbol

Symbol mxnet::cpp::SwapAxis	(	Symbol	data,
		uint32_t	dim1 = `0`,
		uint32_t	dim2 = `0`
	)

inline

Interchanges two axes of an array.

   Examples::

   x = [[1, 2, 3]])
   swapaxes(x, 0, 1) = [[ 1],
   [ 2],
   [ 3]]

   x = [[[ 0, 1],
   [ 2, 3]],
   [[ 4, 5],
   [ 6, 7]]]  // (2,2,2) array

   swapaxes(x, 0, 2) = [[[ 0, 4],
   [ 2, 6]],
   [[ 1, 5],
   [ 3, 7]]]


   Defined in src/operator/swapaxis.cc:L70

Parameters

data	Input array.
dim1	the first axis to be swapped.
dim2	the second axis to be swapped.

Returns: new symbol

Symbol mxnet::cpp::take	(	const std::string &	symbol_name,
		Symbol	a,
		Symbol	indices,
		int	axis = `0`,
		TakeMode	mode = `TakeMode::kClip`
	)

inline

Takes elements from an input array along the given axis.

   This function slices the input array along a particular axis with the provided

   Given data tensor of rank r >= 1, and indices tensor of rank q, gather entries
   dimension of data (by default outer-most one as axis=0) indexed by indices, and
   in an output tensor of rank q + (r - 1).

   Examples::
   x = [4.  5.  6.]

   // Trivial case, take the second element along the first axis.

   take(x, [1]) = [ 5. ]

   // The other trivial case, axis=-1, take the third element along the first axis

   take(x, [3], axis=-1, mode='clip') = [ 6. ]

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // In this case we will get rows 0 and 1, then 1 and 2. Along axis 0

   take(x, [[0,1],[1,2]]) = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 3.,  4.],
   [ 5.,  6.]]]

   // In this case we will get rows 0 and 1, then 1 and 2 (calculated by wrapping
   // Along axis 1

   take(x, [[0, 3], [-1, -2]], axis=1, mode='wrap') = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 3.,  4.],
   [ 5.,  6.]]]



   Defined in src/operator/tensor/indexing_op.cc:L434

Parameters

symbol_name	name of the resulting symbol
a	The input array.
indices	The indices of the values to be extracted.
axis	The axis of input array to be taken.For input tensor of rank r, it could
mode	Specify how out-of-bound indices bahave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap"

Returns: new symbol

Symbol mxnet::cpp::take	(	Symbol	a,
		Symbol	indices,
		int	axis = `0`,
		TakeMode	mode = `TakeMode::kClip`
	)

inline

Takes elements from an input array along the given axis.

   This function slices the input array along a particular axis with the provided

   Given data tensor of rank r >= 1, and indices tensor of rank q, gather entries
   dimension of data (by default outer-most one as axis=0) indexed by indices, and
   in an output tensor of rank q + (r - 1).

   Examples::
   x = [4.  5.  6.]

   // Trivial case, take the second element along the first axis.

   take(x, [1]) = [ 5. ]

   // The other trivial case, axis=-1, take the third element along the first axis

   take(x, [3], axis=-1, mode='clip') = [ 6. ]

   x = [[ 1.,  2.],
   [ 3.,  4.],
   [ 5.,  6.]]

   // In this case we will get rows 0 and 1, then 1 and 2. Along axis 0

   take(x, [[0,1],[1,2]]) = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 3.,  4.],
   [ 5.,  6.]]]

   // In this case we will get rows 0 and 1, then 1 and 2 (calculated by wrapping
   // Along axis 1

   take(x, [[0, 3], [-1, -2]], axis=1, mode='wrap') = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 3.,  4.],
   [ 5.,  6.]]]



   Defined in src/operator/tensor/indexing_op.cc:L434

Parameters

a	The input array.
indices	The indices of the values to be extracted.
axis	The axis of input array to be taken.For input tensor of rank r, it could
mode	Specify how out-of-bound indices bahave. Default is "clip". "clip" means clip to the range. So, if all indices mentioned are too large, they are replaced by the index that addresses the last element along an axis. "wrap"

Returns: new symbol

Symbol mxnet::cpp::tan	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Computes the element-wise tangent of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   tan([0, \pi/4, \pi/2]) = [0, 1, -inf]

   The storage type of ``tan`` output depends upon the input storage type:

   - tan(default) = default
   - tan(row_sparse) = row_sparse
   - tan(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L83

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::tan ( Symbol data )

inline

Computes the element-wise tangent of the input array.

   The input should be in radians (:math:`2\pi` rad equals 360 degrees).

   .. math::
   tan([0, \pi/4, \pi/2]) = [0, 1, -inf]

   The storage type of ``tan`` output depends upon the input storage type:

   - tan(default) = default
   - tan(row_sparse) = row_sparse
   - tan(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L83

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::tanh	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Returns the hyperbolic tangent of the input array, computed element-wise.

   .. math::
   tanh(x) = sinh(x) / cosh(x)

   The storage type of ``tanh`` output depends upon the input storage type:

   - tanh(default) = default
   - tanh(row_sparse) = row_sparse
   - tanh(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L234

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::tanh ( Symbol data )

inline

Returns the hyperbolic tangent of the input array, computed element-wise.

   .. math::
   tanh(x) = sinh(x) / cosh(x)

   The storage type of ``tanh`` output depends upon the input storage type:

   - tanh(default) = default
   - tanh(row_sparse) = row_sparse
   - tanh(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_trig.cc:L234

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::tile	(	const std::string &	symbol_name,
		Symbol	data,
		Shape	reps
	)

inline

Repeats the whole array multiple times.

   If ``reps`` has length *d*, and input array has dimension of *n*. There are
   three cases:

   - **n=d**. Repeat *i*-th dimension of the input by ``reps[i]`` times::

   x = [[1, 2],
   [3, 4]]

   tile(x, reps=(2,3)) = [[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]]

   - **n>d**. ``reps`` is promoted to length *n* by pre-pending 1's to it. Thus for
   an input shape ``(2,3)``, ``repos=(2,)`` is treated as ``(1,2)``::


   tile(x, reps=(2,)) = [[ 1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.]]

   - **n<d**. The input is promoted to be d-dimensional by prepending new axes. So
   shape ``(2,2)`` array is promoted to ``(1,2,2)`` for 3-D replication::

   tile(x, reps=(2,2,3)) = [[[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]],

   [[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]]]


   Defined in src/operator/tensor/matrix_op.cc:L752

Parameters

symbol_name	name of the resulting symbol
data	Input data array
reps	The number of times for repeating the tensor a. Each dim size of reps must be a positive integer. If reps has length d, the result will have dimension of max(d, a.ndim); If a.ndim < d, a is promoted to be d-dimensional by prepending

Returns: new symbol

Symbol mxnet::cpp::tile	(	Symbol	data,
		Shape	reps
	)

inline

Repeats the whole array multiple times.

   If ``reps`` has length *d*, and input array has dimension of *n*. There are
   three cases:

   - **n=d**. Repeat *i*-th dimension of the input by ``reps[i]`` times::

   x = [[1, 2],
   [3, 4]]

   tile(x, reps=(2,3)) = [[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]]

   - **n>d**. ``reps`` is promoted to length *n* by pre-pending 1's to it. Thus for
   an input shape ``(2,3)``, ``repos=(2,)`` is treated as ``(1,2)``::


   tile(x, reps=(2,)) = [[ 1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.]]

   - **n<d**. The input is promoted to be d-dimensional by prepending new axes. So
   shape ``(2,2)`` array is promoted to ``(1,2,2)`` for 3-D replication::

   tile(x, reps=(2,2,3)) = [[[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]],

   [[ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.],
   [ 1.,  2.,  1.,  2.,  1.,  2.],
   [ 3.,  4.,  3.,  4.,  3.,  4.]]]


   Defined in src/operator/tensor/matrix_op.cc:L752

Parameters

data	Input data array
reps	The number of times for repeating the tensor a. Each dim size of reps must be a positive integer. If reps has length d, the result will have dimension of max(d, a.ndim); If a.ndim < d, a is promoted to be d-dimensional by prepending

Returns: new symbol

Symbol mxnet::cpp::topk	(	const std::string &	symbol_name,
		Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>(-1)`,
		int	k = `1`,
		TopkRetTyp	ret_typ = `TopkRetTyp::kIndices`,
		bool	is_ascend = `false`,
		TopkDtype	dtype = `TopkDtype::kFloat32`
	)

inline

Returns the top k elements in an input array along the given axis. The returned elements will be sorted.

Examples::

x = [[ 0.3, 0.2, 0.4], [ 0.1, 0.3, 0.2]]

// returns an index of the largest element on last axis topk(x) = [[ 2.], [ 1.]]

// returns the value of top-2 largest elements on last axis topk(x, ret_typ='value', k=2) = [[ 0.4, 0.3], [ 0.3, 0.2]]

// returns the value of top-2 smallest elements on last axis topk(x, ret_typ='value', k=2, is_ascend=1) = [[ 0.2 , 0.3], [ 0.1 , 0.2]]

// returns the value of top-2 largest elements on axis 0 topk(x, axis=0, ret_typ='value', k=2) = [[ 0.3, 0.3, 0.4], [ 0.1, 0.2, 0.2]]

// flattens and then returns list of both values and indices topk(x, ret_typ='both', k=2) = [[[ 0.4, 0.3], [ 0.3, 0.2]] , [[ 2., 0.], [

   Defined in src/operator/tensor/ordering_op.cc:L64

Parameters

symbol_name	name of the resulting symbol
data	The input array
axis	Axis along which to choose the top k indices. If not given, the flattened
k	Number of top elements to select, should be always smaller than or equal to
ret_typ	The return type. "value" means to return the top k values, "indices" means to return the indices of the top k values, "mask" means to return a mask array containing 0 and 1. 1 means the top k values. "both" means to return a list of both values and
is_ascend	Whether to choose k largest or k smallest elements. Top K largest
dtype	DType of the output indices when ret_typ is "indices" or "both". An error

Returns: new symbol

Symbol mxnet::cpp::topk	(	Symbol	data,
		dmlc::optional< int >	axis = `dmlc::optional<int>(-1)`,
		int	k = `1`,
		TopkRetTyp	ret_typ = `TopkRetTyp::kIndices`,
		bool	is_ascend = `false`,
		TopkDtype	dtype = `TopkDtype::kFloat32`
	)

inline

Returns the top k elements in an input array along the given axis. The returned elements will be sorted.

Examples::

x = [[ 0.3, 0.2, 0.4], [ 0.1, 0.3, 0.2]]

// returns an index of the largest element on last axis topk(x) = [[ 2.], [ 1.]]

// returns the value of top-2 largest elements on last axis topk(x, ret_typ='value', k=2) = [[ 0.4, 0.3], [ 0.3, 0.2]]

// returns the value of top-2 smallest elements on last axis topk(x, ret_typ='value', k=2, is_ascend=1) = [[ 0.2 , 0.3], [ 0.1 , 0.2]]

// returns the value of top-2 largest elements on axis 0 topk(x, axis=0, ret_typ='value', k=2) = [[ 0.3, 0.3, 0.4], [ 0.1, 0.2, 0.2]]

// flattens and then returns list of both values and indices topk(x, ret_typ='both', k=2) = [[[ 0.4, 0.3], [ 0.3, 0.2]] , [[ 2., 0.], [

   Defined in src/operator/tensor/ordering_op.cc:L64

Parameters

data	The input array
axis	Axis along which to choose the top k indices. If not given, the flattened
k	Number of top elements to select, should be always smaller than or equal to
ret_typ	The return type. "value" means to return the top k values, "indices" means to return the indices of the top k values, "mask" means to return a mask array containing 0 and 1. 1 means the top k values. "both" means to return a list of both values and
is_ascend	Whether to choose k largest or k smallest elements. Top K largest
dtype	DType of the output indices when ret_typ is "indices" or "both". An error

Returns: new symbol

Symbol mxnet::cpp::transpose	(	const std::string &	symbol_name,
		Symbol	data,
		Shape	axes = `Shape()`
	)

inline

Permutes the dimensions of an array.

   Examples::

   x = [[ 1, 2],
   [ 3, 4]]

   transpose(x) = [[ 1.,  3.],
   [ 2.,  4.]]

   x = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 5.,  6.],
   [ 7.,  8.]]]

   transpose(x) = [[[ 1.,  5.],
   [ 3.,  7.]],

   [[ 2.,  6.],
   [ 4.,  8.]]]

   transpose(x, axes=(1,0,2)) = [[[ 1.,  2.],
   [ 5.,  6.]],

   [[ 3.,  4.],
   [ 7.,  8.]]]


   Defined in src/operator/tensor/matrix_op.cc:L311

Parameters

symbol_name	name of the resulting symbol
data	Source input
axes	Target axis order. By default the axes will be inverted.

Returns: new symbol

Symbol mxnet::cpp::transpose	(	Symbol	data,
		Shape	axes = `Shape()`
	)

inline

Permutes the dimensions of an array.

   Examples::

   x = [[ 1, 2],
   [ 3, 4]]

   transpose(x) = [[ 1.,  3.],
   [ 2.,  4.]]

   x = [[[ 1.,  2.],
   [ 3.,  4.]],

   [[ 5.,  6.],
   [ 7.,  8.]]]

   transpose(x) = [[[ 1.,  5.],
   [ 3.,  7.]],

   [[ 2.,  6.],
   [ 4.,  8.]]]

   transpose(x, axes=(1,0,2)) = [[[ 1.,  2.],
   [ 5.,  6.]],

   [[ 3.,  4.],
   [ 7.,  8.]]]


   Defined in src/operator/tensor/matrix_op.cc:L311

Parameters

data	Source input
axes	Target axis order. By default the axes will be inverted.

Returns: new symbol

Symbol mxnet::cpp::trunc	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Return the element-wise truncated value of the input.

   The truncated value of the scalar x is the nearest integer i which is closer to
   zero than x is. In short, the fractional part of the signed number x is

   Example::

   trunc([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  1.,  1.,  2.]

   The storage type of ``trunc`` output depends upon the input storage type:

   - trunc(default) = default
   - trunc(row_sparse) = row_sparse
   - trunc(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L785

Parameters

symbol_name	name of the resulting symbol
data	The input array.

Returns: new symbol

Symbol mxnet::cpp::trunc ( Symbol data )

inline

Return the element-wise truncated value of the input.

   The truncated value of the scalar x is the nearest integer i which is closer to
   zero than x is. In short, the fractional part of the signed number x is

   Example::

   trunc([-2.1, -1.9, 1.5, 1.9, 2.1]) = [-2., -1.,  1.,  1.,  2.]

   The storage type of ``trunc`` output depends upon the input storage type:

   - trunc(default) = default
   - trunc(row_sparse) = row_sparse
   - trunc(csr) = csr



   Defined in src/operator/tensor/elemwise_unary_op_basic.cc:L785

Parameters

data	The input array.

Returns: new symbol

Symbol mxnet::cpp::UpSampling	(	const std::string &	symbol_name,
		const std::vector< Symbol > &	data,
		uint32_t	scale,
		UpSamplingSampleType	sample_type,
		int	num_args,
		uint32_t	num_filter = `0`,
		UpSamplingMultiInputMode	multi_input_mode = `UpSamplingMultiInputMode::kConcat`,
		uint64_t	workspace = `512`
	)

inline

Performs nearest neighbor/bilinear up sampling to inputs.

Parameters

symbol_name	name of the resulting symbol
data	Array of tensors to upsample
scale	Up sampling scale
sample_type	upsampling method
num_args	Number of inputs to be upsampled. For nearest neighbor upsampling, this can be 1-N; the size of output will be(scaleh_0,scalew_0) and all other inputs will be upsampled to thesame size. For bilinear upsampling this must be
num_filter	Input filter. Only used by bilinear sample_type.
multi_input_mode	How to handle multiple input. concat means concatenate upsampled images along the channel dimension. sum means add all images
workspace	Tmp workspace for deconvolution (MB)

Returns: new symbol

Symbol mxnet::cpp::UpSampling	(	const std::vector< Symbol > &	data,
		uint32_t	scale,
		UpSamplingSampleType	sample_type,
		int	num_args,
		uint32_t	num_filter = `0`,
		UpSamplingMultiInputMode	multi_input_mode = `UpSamplingMultiInputMode::kConcat`,
		uint64_t	workspace = `512`
	)

inline

Performs nearest neighbor/bilinear up sampling to inputs.

Parameters

data	Array of tensors to upsample
scale	Up sampling scale
sample_type	upsampling method
num_args	Number of inputs to be upsampled. For nearest neighbor upsampling, this can be 1-N; the size of output will be(scaleh_0,scalew_0) and all other inputs will be upsampled to thesame size. For bilinear upsampling this must be
num_filter	Input filter. Only used by bilinear sample_type.
multi_input_mode	How to handle multiple input. concat means concatenate upsampled images along the channel dimension. sum means add all images
workspace	Tmp workspace for deconvolution (MB)

Returns: new symbol

Symbol mxnet::cpp::where	(	const std::string &	symbol_name,
		Symbol	condition,
		Symbol	x,
		Symbol	y
	)

inline

Return the elements, either from x or y, depending on the condition.

   Given three ndarrays, condition, x, and y, return an ndarray with the elements
   depending on the elements from condition are true or false. x and y must have
   If condition has the same shape as x, each element in the output array is from
   corresponding element in the condition is true, and from y if false.

   If condition does not have the same shape as x, it must be a 1D array whose
   the same as x's first dimension size. Each row of the output array is from x's
   if the corresponding element from condition is true, and from y's row if false.

   Note that all non-zero values are interpreted as ``True`` in condition.

   Examples::

   x = [[1, 2], [3, 4]]
   y = [[5, 6], [7, 8]]
   cond = [[0, 1], [-1, 0]]

   where(cond, x, y) = [[5, 2], [3, 8]]

   csr_cond = cast_storage(cond, 'csr')

   where(csr_cond, x, y) = [[5, 2], [3, 8]]



   Defined in src/operator/tensor/control_flow_op.cc:L57

Parameters

symbol_name	name of the resulting symbol
condition	condition array
x
y

Returns: new symbol

Symbol mxnet::cpp::where	(	Symbol	condition,
		Symbol	x,
		Symbol	y
	)

inline

Return the elements, either from x or y, depending on the condition.

   Given three ndarrays, condition, x, and y, return an ndarray with the elements
   depending on the elements from condition are true or false. x and y must have
   If condition has the same shape as x, each element in the output array is from
   corresponding element in the condition is true, and from y if false.

   If condition does not have the same shape as x, it must be a 1D array whose
   the same as x's first dimension size. Each row of the output array is from x's
   if the corresponding element from condition is true, and from y's row if false.

   Note that all non-zero values are interpreted as ``True`` in condition.

   Examples::

   x = [[1, 2], [3, 4]]
   y = [[5, 6], [7, 8]]
   cond = [[0, 1], [-1, 0]]

   where(cond, x, y) = [[5, 2], [3, 8]]

   csr_cond = cast_storage(cond, 'csr')

   where(csr_cond, x, y) = [[5, 2], [3, 8]]



   Defined in src/operator/tensor/control_flow_op.cc:L57

Parameters

condition	condition array
x
y

Returns: new symbol

Symbol mxnet::cpp::zeros_like	(	const std::string &	symbol_name,
		Symbol	data
	)

inline

Return an array of zeros with the same shape, type and storage type as the input array.

The storage type of zeros_like output depends on the storage type of the

zeros_like(row_sparse) = row_sparse
zeros_like(csr) = csr
zeros_like(default) = default

Examples::

x = [[ 1., 1., 1.], [ 1., 1., 1.]]

zeros_like(x) = [[ 0., 0., 0.], [ 0., 0., 0.]]

Parameters

symbol_name	name of the resulting symbol
data	The input

Returns: new symbol

Symbol mxnet::cpp::zeros_like ( Symbol data )

inline

Return an array of zeros with the same shape, type and storage type as the input array.

The storage type of zeros_like output depends on the storage type of the

zeros_like(row_sparse) = row_sparse
zeros_like(csr) = csr
zeros_like(default) = default

Examples::

x = [[ 1., 1., 1.], [ 1., 1., 1.]]

zeros_like(x) = [[ 0., 0., 0.], [ 0., 0., 0.]]

Parameters

data The input

Returns: new symbol

Classes

Typedefs

Enumerations

Functions

Typedef Documentation

Enumeration Type Documentation

Function Documentation