30 #ifndef MSHADOW_TENSOR_H_ 31 #define MSHADOW_TENSOR_H_ 62 inline std::ostream &operator<<(std::ostream &os, const Shape<ndim> &shape);
68 template<
int dimension>
71 static const int kDimension = dimension;
73 static const int kSubdim = dimension - 1;
81 for (
int i = 0; i < kDimension; ++i) {
82 this->shape_[i] = s[i];
107 for (
int i = 0; i < kDimension; ++i) {
108 if (s.
shape_[i] != this->shape_[i])
return false;
117 return !(*
this == s);
134 s.
shape_[1] = this->shape_[kDimension - 1];
137 for (
int i = 0; i < kDimension - 1; ++i) {
138 ymax *= this->shape_[i];
145 index_t size = this->shape_[0];
147 for (
int i = 1; i < kDimension; ++i) {
148 size *= this->shape_[i];
160 for (
int i = dimstart; i < dimend; ++i) {
161 num *= this->shape_[i];
173 for (
int i = 0; i < kSubdim; ++i) {
174 s.
shape_[i] = this->shape_[i + 1];
184 template<
int dimstart,
int dimend>
186 Shape<dimend - dimstart> s;
188 for (
int i = dimstart; i < dimend; ++i) {
189 s[i - dimstart] = this->shape_[i];
195 friend std::ostream &operator<<(std::ostream &os, const Shape<dim> &shape);
229 s[0] = s0; s[1] = s1; s[2] = s2;
243 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
258 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; s[4] = s4;
271 switch (src_layout) {
281 LOG(FATAL) <<
"Invalid layout for 3d shape " << src_layout;
283 switch (dst_layout) {
294 LOG(FATAL) <<
"Invalid layout for 3d shape " << src_layout;
308 switch (src_layout) {
319 LOG(FATAL) <<
"Invalid layout for 4d shape " << src_layout;
323 switch (dst_layout) {
333 LOG(FATAL) <<
"Invalid layout for 4d shape " << src_layout;
348 switch (src_layout) {
360 LOG(FATAL) <<
"Invalid layout for 5d shape " << src_layout;
363 switch (dst_layout) {
374 LOG(FATAL) <<
"Invalid layout for 5d shape " << src_layout;
382 template<
typename Device>
408 template<
typename Container,
typename Device,
int dimension,
typename DType>
418 template<
typename Device,
int dimension,
421 Device, dimension, DType> {
429 static const int kSubdim = dimension - 1;
434 DType *dptr_ =
nullptr;
454 : shape_(shape), stream_(NULL) {}
457 : dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(NULL) {}
461 : dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(stream) {}
466 : dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
472 this->stream_ = stream;
478 template<
int startdim>
482 for (
int i = startdim; i < kSubdim; ++i) {
483 memsz *= this->shape_[i];
492 return this->shape_[dimension - 1] == stride_;
498 return this->MemSize<0>();
529 shape_.
SubShape(), stride_, stream_);
542 s, stride_, stream_);
554 template<
typename E,
int etype>
557 return this->__assign(exp);
561 return this->__assign(exp);
567 template<
typename Device,
typename DType>
569 public TRValue<Tensor<Device, 1, DType>, Device, 1, DType> {
578 : shape_(shape), stream_(NULL) {}
580 : dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(NULL) {}
582 : dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(stream) {}
585 : dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
587 this->stream_ = stream;
624 template<
typename E,
int etype>
627 return this->__assign(exp);
630 return this->__assign(exp);
643 template<
typename Device>
651 template<
typename Device>
658 template<
typename Device>
668 template<
typename Device>
670 bool create_dnn_handle,
676 template<
typename Device>
678 return NewStream<Device>(
true,
false, dev_id);
684 template<
typename Device>
697 template<
int dim,
typename DType>
711 template<
int dim,
typename DType>
720 template<
int dim,
typename DType>
728 template<
int dim,
typename DType>
742 template<
typename Device,
typename DType,
int dim>
755 template<
int dim,
typename DType>
767 template<
int dim,
typename DType>
779 template<
int dim,
typename DType>
791 template<
int dim,
typename DType>
800 template<
typename DType>
807 template<
typename DType>
816 template<
typename DType>
826 template<
typename DType>
838 template<
bool clip = true,
typename IndexType,
typename DType>
850 template<
bool clip = true,
typename IndexType,
typename DType>
863 template<
typename IndexType,
typename DType>
877 template<
typename IndexType,
typename DType>
890 template<
typename IndexType,
typename DType>
902 template<
typename IndexType,
typename DType>
912 template<
typename KDType,
typename VDType>
914 bool is_ascend =
true);
921 template<
typename KDType,
typename VDType>
923 bool is_ascend =
true);
932 template<
typename Device,
typename VDType,
typename SDType>
949 template<
typename Saver,
typename R,
int dim,
950 typename DType,
typename E,
int etype>
965 template<
typename Saver,
typename R,
int dim,
966 typename DType,
typename E,
int etype>
982 template<
typename Saver,
typename Reducer,
983 typename R,
typename DType,
typename E,
int etype>
1000 template<
typename Saver,
typename Reducer,
typename R,
1001 typename DType,
typename E,
int etype>
1019 template<
typename Saver,
typename Reducer,
int dimkeep,
1020 typename R,
typename DType,
typename E,
int etype>
1038 template<
typename Saver,
typename Reducer,
int dimkeep,
1039 typename R,
typename DType,
typename E,
int etype>
1049 template<
typename Device,
typename DType>
1062 template<
bool transpose_left,
bool transpose_right,
typename Device,
typename DType>
1080 #ifdef MSHADOW_SCALAR_ 1081 #error "MSHADOW_SCALAR_ must not be defined" 1084 #define MSHADOW_SCALAR_ float 1086 #undef MSHADOW_SCALAR_ 1087 #define MSHADOW_SCALAR_ double 1089 #undef MSHADOW_SCALAR_ 1090 #define MSHADOW_SCALAR_ int32_t 1092 #undef MSHADOW_SCALAR_ 1093 #define MSHADOW_SCALAR_ int64_t 1095 #undef MSHADOW_SCALAR_ 1096 #define MSHADOW_SCALAR_ mshadow::half::half_t 1098 #undef MSHADOW_SCALAR_ 1099 #endif // MSHADOW_TENSOR_H_ MSHADOW_XINLINE bool operator==(const Shape< kDimension > &s) const
Definition: tensor.h:105
void VectorDot(Tensor< Device, 1, DType > dst, const Tensor< Device, 1, DType > &lhs, const Tensor< Device, 1, DType > &rhs)
CPU/GPU: 1 dimension vector dot.
Definition: tensor_cpu-inl.h:597
void FreeSpace(Tensor< cpu, dim, DType > *obj)
CPU/GPU: free the space of tensor, will set obj.dptr to NULL.
Definition: tensor_cpu-inl.h:140
Stream< Device > * stream_
Definition: tensor.h:574
MSHADOW_XINLINE index_t & operator[](int idx)
get corresponding index
Definition: tensor.h:90
void IndexFill(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix...
Definition: tensor_cpu-inl.h:547
void SoftmaxGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label)
CPU/GPU: softmax gradient.
Definition: tensor_cpu-inl.h:306
PaddingExp< SrcExp, DType, ExpInfo< SrcExp >::kDim > pad(const Exp< SrcExp, DType, etype > &src, index_t pad)
padding expression, pad a image with zeros on boundaries, padding affects shape[0], and shape[1]
Definition: pad.h:71
void BatchGEMM(Tensor< Device, 3, DType > dst, const Tensor< Device, 3, DType > &lhs, const Tensor< Device, 3, DType > &rhs, DType alpha, DType beta, Tensor< Device, 1, DType *> workspace)
CPU/GPU: dst = alpha * op(lhs) op(rhs) + beta * dst.
Definition: tensor_cpu-inl.h:610
DType * dptr_
pointer to the data
Definition: tensor.h:434
Tensor RValue, this is the super type of all kinds of possible tensors.
Definition: tensor.h:409
Stream< Device > * NewStream(bool create_blas_handle, bool create_dnn_handle, int dev_id=-1)
create a new stream from system
void Copy(Tensor< cpu, dim, DType > dst, const Tensor< cpu, dim, DType > &src, Stream< cpu > *stream=NULL)
copy data from one tensor to another, with same shape
Definition: tensor_cpu-inl.h:145
void ShutdownTensorEngine(void)
Shutdown tensor engine on current device this function should be called after all GPU tensor operatio...
shape of a tensor
Definition: tensor.h:53
MSHADOW_XINLINE index_t MSize(void) const
Definition: tensor.h:497
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape, Stream< Device > *stream)
Definition: tensor.h:581
void MapExp(TRValue< R, cpu, dim, DType > *dst, const expr::Exp< E, DType, etype > &exp)
CPU/GPU: map a expression to a tensor, this function calls MapPlan.
Definition: tensor_cpu-inl.h:207
Definition: stream_gpu-inl.h:37
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape)
constructor from data pointer and shape, without stride
Definition: tensor.h:456
Shape< dimension > shape_
shape of the tensor
Definition: tensor.h:436
MSHADOW_XINLINE Shape< 4 > Shape4(index_t s0, index_t s1, index_t s2, index_t s3)
construct a four dimension shape, stride will equal s0
Definition: tensor.h:240
void SortByKey(Tensor< cpu, 1, KDType > keys, Tensor< cpu, 1, VDType > values, bool is_ascend=true)
CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) ...
Definition: tensor_cpu-inl.h:558
Tensor< Device, dimension, DType > & operator=(const expr::Exp< E, DType, etype > &exp)
functions to fit expression template
Definition: tensor.h:556
void Softmax(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &energy)
CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) ...
Definition: tensor_cpu-inl.h:483
MSHADOW_XINLINE Shape< kSubdim > SubShape(void) const
get subshape that takes off largest dimension v *
Definition: tensor.h:169
void VectorizedSort(Tensor< Device, 1, VDType > values, Tensor< Device, 1, SDType > segments)
CPU/GPU: Sort the keys within each segment. (Stable sort is performed!) Segments is defined as an asc...
Definition: tensor_cpu-inl.h:589
void set_stream(Stream< Device > *stream)
set the stream to do computation of current tensor
Definition: tensor.h:471
base class of all rvalues
Definition: expression.h:148
MSHADOW_XINLINE index_t MSize(void) const
Definition: tensor.h:603
MSHADOW_XINLINE Tensor< Device, 1, DType > Slice(index_t begin, index_t end) const
Definition: tensor.h:595
static const bool kDevCPU
whether this device is CPU or not
Definition: tensor.h:41
void DeleteStream(Stream< Device > *stream)
delete the computing stream
void MapReduceKeepLowest(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) ...
Definition: tensor_cpu-inl.h:223
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape)
Definition: tensor.h:579
MSHADOW_XINLINE Shape< dimend - dimstart > Slice(void) const
slice the shape from start to end
Definition: tensor.h:185
MSHADOW_XINLINE const DType & operator[](index_t idx) const
Definition: tensor.h:612
#define MSHADOW_ALLOC_PAD
whether do padding during allocation
Definition: base.h:72
device name CPU
Definition: tensor.h:39
MSHADOW_XINLINE Shape< 1 > FlatTo1D(void) const
Definition: tensor.h:123
device name GPU
Definition: tensor.h:46
bool CheckIdle(void)
query whether the the stream is idle
Definition: tensor.h:395
#define MSHADOW_XINLINE
Definition: base.h:230
Tensor< Device, 1, DType > & operator=(const Tensor< Device, 1, DType > &exp)
implement the assignment of same type
Definition: tensor.h:617
MSHADOW_XINLINE Tensor(void)
default constructor
Definition: tensor.h:451
definitions of abstract expressions and expressions template
MSHADOW_XINLINE index_t size(index_t i) const
Definition: tensor.h:606
Tensor< Device, dimension, DType > & operator=(const Tensor< Device, dimension, DType > &exp)
implement the assignment of same type
Definition: tensor.h:546
Shape< 3 > ConvertLayout(const Shape< 3 > &src, int src_layout, int dst_layout)
Convert shape in src_layout to shape in dst_layout.
Definition: tensor.h:269
void CreateBlasHandle()
create a blas handle
Definition: tensor.h:399
int32_t index_t
type that will be used for index
Definition: base.h:343
MSHADOW_XINLINE bool operator!=(const Shape< kDimension > &s) const
Definition: tensor.h:116
void AllocSpace(Tensor< cpu, dim, DType > *obj, bool pad=MSHADOW_ALLOC_PAD)
CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible t...
Definition: tensor_cpu-inl.h:116
MSHADOW_XINLINE Shape< 2 > FlatTo2D(void) const
Definition: tensor.h:132
DType * dptr_
Definition: tensor.h:571
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:519
definitions of how expressions should be evaluated
definitions of operators in expression with respect to scalar this file will be included several time...
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:505
void AddTakeGradLargeBatch(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &sorted, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the bat...
Definition: tensor_cpu-inl.h:537
MSHADOW_XINLINE Shape< 5 > Shape5(index_t s0, index_t s1, index_t s2, index_t s3, index_t s4)
construct a five dimension shape, stride will equal s0
Definition: tensor.h:255
MSHADOW_XINLINE bool CheckContiguous(void) const
Definition: tensor.h:491
void SetDevice(int devid)
set the device of current thread to work on
MSHADOW_XINLINE Shape(const Shape< kDimension > &s)
constuctor
Definition: tensor.h:79
some extension of expressions, used to support something beyond elementwise op
MSHADOW_XINLINE Shape< 1 > Shape1(index_t s0)
construct a one dimension shape, stride will equal s0
Definition: tensor.h:206
index_t shape_[kDimension]
storing the dimension information
Definition: tensor.h:75
MSHADOW_XINLINE Tensor(const Shape< 1 > &shape)
Definition: tensor.h:577
MSHADOW_XINLINE Shape(void)
default constructor, do nothing
Definition: tensor.h:77
void InitTensorEngine(int device_id=0)
initialize tensor engine, used to call intialization functions of dependent libs this function should...
MSHADOW_XINLINE Shape< 2 > Shape2(index_t s0, index_t s1)
construct a two dimension shape, stride will equal s0
Definition: tensor.h:216
implementation of GPU code
MSHADOW_XINLINE Tensor< Device, 1, DType > FlatTo1D(void) const
flatten the tensor to 1 dimension
Definition: tensor.h:512
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:43
MSHADOW_XINLINE index_t ProdShape(int dimstart, int dimend) const
Definition: tensor.h:157
void Wait(void)
wait for all the computations associated with this stream to complete
Definition: tensor.h:390
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape, index_t stride, Stream< Device > *stream)
constructor from data pointer and shape
Definition: tensor.h:463
MSHADOW_XINLINE index_t MemSize(void) const
Definition: tensor.h:479
void MapReduceKeepHighDim(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) ...
Definition: tensor_cpu-inl.h:250
MSHADOW_XINLINE Tensor(const Shape< dimension > &shape)
constructor from shape
Definition: tensor.h:453
index_t stride_
Definition: tensor.h:573
Tensor< Device, dim, DType > NewTensor(const Shape< dim > &shape, DType initv, bool pad=MSHADOW_ALLOC_PAD, Stream< Device > *stream=NULL)
CPU/GPU: short cut to allocate and initialize a Tensor.
Definition: tensor_cpu-inl.h:132
defines how expression exp can be evaluated and stored into dst
Definition: expression.h:79
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape, Stream< Device > *stream)
constructor from data pointer and shape, without stride
Definition: tensor.h:459
Tensor< Device, 1, DType > & operator=(const expr::Exp< E, DType, etype > &exp)
Definition: tensor.h:626
MSHADOW_XINLINE bool CheckContiguous(void) const
Definition: tensor.h:600
implementation of GPU host code
tensor container that does memory allocation and resize like STL
void AddTakeGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim ...
Definition: tensor_cpu-inl.h:516
MSHADOW_XINLINE const index_t & operator[](int idx) const
get corresponding index
Definition: tensor.h:98
MSHADOW_XINLINE Shape< 3 > Shape3(index_t s0, index_t s1, index_t s2)
construct a three dimension shape, stride will equal s0
Definition: tensor.h:227
overloaded + operator between half_t and bf16_t
Definition: base.h:334
void set_stream(Stream< Device > *stream)
Definition: tensor.h:586
Random inline functions for tensor.
MSHADOW_XINLINE DType & operator[](index_t idx)
Definition: tensor.h:609
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape, index_t stride, Stream< Device > *stream)
Definition: tensor.h:583
MSHADOW_XINLINE Tensor< Device, 1, DType > FlatTo1D(void) const
Definition: tensor.h:589
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:441
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
Definition: tensor.h:592
Tensor< Device, 1, DType > & operator=(const DType &exp)
Definition: tensor.h:629
MSHADOW_XINLINE Tensor< Device, dimension, DType > Slice(index_t begin, index_t end) const
slice the tensor in highest dimension [begin,end)
Definition: tensor.h:538
general tensor
Definition: tensor.h:420
implementation of CPU host code
#define MSHADOW_DEFAULT_DTYPE
default data type for tensor string in code release, change it to default_real_t during development...
Definition: base.h:249
MSHADOW_XINLINE Tensor(void)
Definition: tensor.h:576
Stream< Device > * stream_
stream where the computation lies stream is a device dependency concept where each computation ...
Definition: tensor.h:446
Shape< 1 > shape_
Definition: tensor.h:572
MSHADOW_XINLINE index_t Size(void) const
Definition: tensor.h:144
MSHADOW_XINLINE Tensor< Device, kSubdim, DType > operator[](index_t idx) const
get a element of dimension - 1
Definition: tensor.h:527
Tensor< Device, dimension, DType > & operator=(const DType &exp)
functions to fit expression template
Definition: tensor.h:560
computaion stream structure, used for asynchronous computations
Definition: tensor.h:383