mxnet
tensor.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
30 #ifndef MSHADOW_TENSOR_H_
31 #define MSHADOW_TENSOR_H_
32 #include <string>
33 #include <iostream>
34 #include "./base.h"
35 #include "./expression.h"
36 
37 namespace mshadow {
39 struct cpu {
41  static const bool kDevCPU = true;
43  static const int kDevMask = 1 << 0;
44 };
46 struct gpu {
48  static const bool kDevCPU = false;
50  static const int kDevMask = 1 << 1;
51 };
52 template<int ndim>
53 struct Shape;
54 
61 template<int ndim>
62 inline std::ostream &operator<<(std::ostream &os, const Shape<ndim> &shape); // NOLINT(*)
63 
68 template<int dimension>
69 struct Shape {
71  static const int kDimension = dimension;
73  static const int kSubdim = dimension - 1;
75  index_t shape_[kDimension];
80  #pragma unroll
81  for (int i = 0; i < kDimension; ++i) {
82  this->shape_[i] = s[i];
83  }
84  }
91  return shape_[idx];
92  }
98  MSHADOW_XINLINE const index_t &operator[](int idx) const {
99  return shape_[idx];
100  }
106  #pragma unroll
107  for (int i = 0; i < kDimension; ++i) {
108  if (s.shape_[i] != this->shape_[i]) return false;
109  }
110  return true;
111  }
117  return !(*this == s);
118  }
124  Shape<1> s;
125  s[0] = this->Size();
126  return s;
127  }
133  Shape<2> s;
134  s.shape_[1] = this->shape_[kDimension - 1];
135  index_t ymax = 1;
136  #pragma unroll
137  for (int i = 0; i < kDimension - 1; ++i) {
138  ymax *= this->shape_[i];
139  }
140  s.shape_[0] = ymax;
141  return s;
142  }
145  index_t size = this->shape_[0];
146  #pragma unroll
147  for (int i = 1; i < kDimension; ++i) {
148  size *= this->shape_[i];
149  }
150  return size;
151  }
157  MSHADOW_XINLINE index_t ProdShape(int dimstart, int dimend) const {
158  index_t num = 1;
159  #pragma unroll
160  for (int i = dimstart; i < dimend; ++i) {
161  num *= this->shape_[i];
162  }
163  return num;
164  }
170  Shape<kSubdim> s;
171  // for cuda
172  #pragma unroll
173  for (int i = 0; i < kSubdim; ++i) {
174  s.shape_[i] = this->shape_[i + 1];
175  }
176  return s;
177  }
184  template<int dimstart, int dimend>
185  MSHADOW_XINLINE Shape<dimend - dimstart> Slice(void) const {
186  Shape<dimend - dimstart> s;
187  #pragma unroll
188  for (int i = dimstart; i < dimend; ++i) {
189  s[i - dimstart] = this->shape_[i];
190  }
191  return s;
192  }
194  template<int dim>
195  friend std::ostream &operator<<(std::ostream &os, const Shape<dim> &shape); // NOLINT(*)
197 }; // Shape
198 //------------------------------------------------
199 // useful construction functions to generate shape
200 //-------------------------------------------------
207  Shape<1> s; s[0] = s0;
208  return s;
209 }
217  Shape<2> s; s[0] = s0; s[1] = s1;
218  return s;
219 }
228  Shape<3> s;
229  s[0] = s0; s[1] = s1; s[2] = s2;
230  return s;
231 }
241  index_t s2, index_t s3) {
242  Shape<4> s;
243  s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
244  return s;
245 }
256  index_t s3, index_t s4) {
257  Shape<5> s;
258  s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; s[4] = s4;
259  return s;
260 }
261 
269 inline Shape<3> ConvertLayout(const Shape<3>& src, int src_layout, int dst_layout) {
270  Shape<3> dst;
271  switch (src_layout) {
272  case kNCW:
273  dst = src;
274  break;
275  case kNWC:
276  dst[0] = src[0];
277  dst[1] = src[2];
278  dst[2] = src[1];
279  break;
280  default:
281  LOG(FATAL) << "Invalid layout for 3d shape " << src_layout;
282  }
283  switch (dst_layout) {
284  case kNCW:
285  return dst;
286  case kNWC:
287  {
288  index_t tmp = dst[1];
289  dst[1] = dst[2];
290  dst[2] = tmp;
291  }
292  break;
293  default:
294  LOG(FATAL) << "Invalid layout for 3d shape " << src_layout;
295  }
296  return dst;
297 }
298 
306 inline Shape<4> ConvertLayout(const Shape<4>& src, int src_layout, int dst_layout) {
307  Shape<4> dst;
308  switch (src_layout) {
309  case kNCHW:
310  dst = src;
311  break;
312  case kNHWC:
313  dst[0] = src[0];
314  dst[2] = src[1];
315  dst[3] = src[2];
316  dst[1] = src[3];
317  break;
318  default:
319  LOG(FATAL) << "Invalid layout for 4d shape " << src_layout;
320  dst = src; // fixes compiler warning
321  }
322  Shape<4> dst2;
323  switch (dst_layout) {
324  case kNCHW:
325  return dst;
326  case kNHWC:
327  dst2[0] = dst[0];
328  dst2[1] = dst[2];
329  dst2[2] = dst[3];
330  dst2[3] = dst[1];
331  break;
332  default:
333  LOG(FATAL) << "Invalid layout for 4d shape " << src_layout;
334  dst2 = src; // fixes compiler warning
335  }
336  return dst2;
337 }
338 
346 inline Shape<5> ConvertLayout(const Shape<5>& src, int src_layout, int dst_layout) {
347  Shape<5> dst;
348  switch (src_layout) {
349  case kNCDHW:
350  dst = src;
351  break;
352  case kNDHWC:
353  dst[0] = src[0];
354  dst[2] = src[1];
355  dst[3] = src[2];
356  dst[4] = src[3];
357  dst[1] = src[4];
358  break;
359  default:
360  LOG(FATAL) << "Invalid layout for 5d shape " << src_layout;
361  }
362  Shape<5> dst2;
363  switch (dst_layout) {
364  case kNCDHW:
365  return dst;
366  case kNDHWC:
367  dst2[0] = dst[0];
368  dst2[1] = dst[2];
369  dst2[2] = dst[3];
370  dst2[3] = dst[4];
371  dst2[4] = dst[1];
372  break;
373  default:
374  LOG(FATAL) << "Invalid layout for 5d shape " << src_layout;
375  }
376  return dst2;
377 }
378 
382 template<typename Device>
383 struct Stream {
384  // this is only a dummy implementation for CPU
385  // for GPU, the actual implementation will be specialized in tensor_gpu-inl.h
390  inline void Wait(void) {}
395  inline bool CheckIdle(void) {
396  return true;
397  }
399  inline void CreateBlasHandle() {}
400 };
408 template<typename Container, typename Device, int dimension, typename DType>
409 struct TRValue: public expr::RValueExp<Container, DType> {
410 };
411 // more compact template
418 template<typename Device, int dimension,
419  typename DType MSHADOW_DEFAULT_DTYPE>
420 struct Tensor: public TRValue<Tensor<Device, dimension, DType>,
421  Device, dimension, DType> {
422  public:
423  //--------------------------------
424  // struct memembers
425  //--------------------------------
427  static const bool kDevCPU = Device::kDevCPU;
429  static const int kSubdim = dimension - 1;
430  //--------------------------------
431  // struct memembers
432  //--------------------------------
434  DType *dptr_ = nullptr;
447  //--------------------------------
448  // functions
449  //--------------------------------
451  MSHADOW_XINLINE Tensor(void) : stream_(NULL) {}
454  : shape_(shape), stream_(NULL) {}
456  MSHADOW_XINLINE Tensor(DType *dptr, const Shape<dimension> &shape)
457  : dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(NULL) {}
459  MSHADOW_XINLINE Tensor(DType *dptr, const Shape<dimension> &shape,
460  Stream<Device> *stream)
461  : dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(stream) {}
463  MSHADOW_XINLINE Tensor(DType *dptr,
464  const Shape<dimension> &shape,
465  index_t stride, Stream<Device> *stream)
466  : dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
471  inline void set_stream(Stream<Device> *stream) {
472  this->stream_ = stream;
473  }
478  template<int startdim>
480  index_t memsz = this->stride_;
481  #pragma unroll
482  for (int i = startdim; i < kSubdim; ++i) {
483  memsz *= this->shape_[i];
484  }
485  return memsz;
486  }
491  MSHADOW_XINLINE bool CheckContiguous(void) const {
492  return this->shape_[dimension - 1] == stride_;
493  }
498  return this->MemSize<0>();
499  }
505  MSHADOW_XINLINE index_t size(int idx) const {
506  return shape_[idx];
507  }
513  return Tensor<Device, 1, DType>(dptr_, shape_.FlatTo1D(), stride_, stream_);
514  }
520  return Tensor<Device, 2, DType>(dptr_, shape_.FlatTo2D(), stride_, stream_);
521  }
528  return Tensor<Device, kSubdim, DType>(dptr_ + this->MemSize<1>() * idx,
529  shape_.SubShape(), stride_, stream_);
530  }
538  Slice(index_t begin, index_t end) const {
539  Shape<dimension> s = this->shape_;
540  s[0] = end - begin;
541  return Tensor<Device, dimension, DType>(dptr_ + this->MemSize<1>() * begin,
542  s, stride_, stream_);
543  }
547  dptr_ = exp.dptr_;
548  shape_ = exp.shape_;
549  stride_ = exp.stride_;
550  stream_ = exp.stream_;
551  return *this;
552  }
554  template<typename E, int etype>
557  return this->__assign(exp);
558  }
560  inline Tensor<Device, dimension, DType> &operator=(const DType &exp) {
561  return this->__assign(exp);
562  }
563 };
564 /*
565  * respecialized class Tensor1D, thei is due to different implementation in operator[]
566  */
567 template<typename Device, typename DType>
568 struct Tensor<Device, 1, DType>:
569  public TRValue<Tensor<Device, 1, DType>, Device, 1, DType> {
570  public:
571  DType *dptr_;
575  // constructor
576  MSHADOW_XINLINE Tensor(void) : stream_(NULL) {}
578  : shape_(shape), stream_(NULL) {}
579  MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape)
580  : dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(NULL) {}
581  MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape, Stream<Device> *stream)
582  : dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(stream) {}
583  MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape,
584  index_t stride, Stream<Device> *stream)
585  : dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
586  inline void set_stream(Stream<Device> *stream) {
587  this->stream_ = stream;
588  }
590  return *this;
591  }
593  return Tensor<Device, 2, DType>(dptr_, shape_.FlatTo2D(), stride_, stream_);
594  }
596  Shape<1> s;
597  s[0] = end - begin;
598  return Tensor<Device, 1, DType>(dptr_ + begin, s, s[0], stream_);
599  }
600  MSHADOW_XINLINE bool CheckContiguous(void) const {
601  return true;
602  }
604  return shape_[0];
605  }
607  return shape_[0];
608  }
610  return dptr_[idx];
611  }
612  MSHADOW_XINLINE const DType &operator[](index_t idx) const {
613  return dptr_[idx];
614  }
616  inline Tensor<Device, 1, DType> &
618  dptr_ = exp.dptr_;
619  shape_ = exp.shape_;
620  stride_ = exp.stride_;
621  stream_ = exp.stream_;
622  return *this;
623  }
624  template<typename E, int etype>
625  inline Tensor<Device, 1, DType> &
627  return this->__assign(exp);
628  }
629  inline Tensor<Device, 1, DType> &operator=(const DType &exp) {
630  return this->__assign(exp);
631  }
632 };
633 //------------------------
634 // Function Declarations
635 //-----------------------
643 template<typename Device>
644 inline void InitTensorEngine(int device_id = 0);
651 template<typename Device>
652 inline void ShutdownTensorEngine(void);
658 template<typename Device>
659 inline void SetDevice(int devid);
668 template<typename Device>
669 inline Stream<Device> *NewStream(bool create_blas_handle,
670  bool create_dnn_handle,
671  int dev_id = -1);
676 template<typename Device>
677 inline Stream<Device> *NewStream(int dev_id) {
678  return NewStream<Device>(true, false, dev_id);
679 }
684 template<typename Device>
685 inline void DeleteStream(Stream<Device> *stream);
697 template<int dim, typename DType>
698 inline void AllocSpace(Tensor<cpu, dim, DType> *obj,
699  bool pad = MSHADOW_ALLOC_PAD);
711 template<int dim, typename DType>
712 inline void AllocSpace(Tensor<gpu, dim, DType> *obj,
713  bool pad = MSHADOW_ALLOC_PAD);
720 template<int dim, typename DType>
721 inline void FreeSpace(Tensor<cpu, dim, DType> *obj);
728 template<int dim, typename DType>
729 inline void FreeSpace(Tensor<gpu, dim, DType> *obj);
742 template<typename Device, typename DType, int dim>
744  DType initv,
745  bool pad = MSHADOW_ALLOC_PAD,
746  Stream<Device> *stream = NULL);
755 template<int dim, typename DType>
756 inline void Copy(Tensor<cpu, dim, DType> dst,
757  const Tensor<cpu, dim, DType> &src,
758  Stream<cpu> *stream = NULL);
767 template<int dim, typename DType>
768 inline void Copy(Tensor<cpu, dim, DType> dst,
769  const Tensor<gpu, dim, DType> &src,
770  Stream<gpu> *stream = NULL);
779 template<int dim, typename DType>
780 inline void Copy(Tensor<gpu, dim, DType> dst,
781  const Tensor<cpu, dim, DType> &src,
782  Stream<gpu> *stream = NULL);
791 template<int dim, typename DType>
792 inline void Copy(Tensor<gpu, dim, DType> dst,
793  const Tensor<gpu, dim, DType> &src,
794  Stream<gpu> *stream = NULL);
800 template<typename DType>
801 inline void Softmax(Tensor<cpu, 2, DType> dst, const Tensor<cpu, 2, DType> &energy);
807 template<typename DType>
808 inline void Softmax(Tensor<gpu, 2, DType> dst, const Tensor<gpu, 2, DType> &energy);
809 
816 template<typename DType>
817 inline void SoftmaxGrad(Tensor<cpu, 2, DType> dst,
818  const Tensor<cpu, 2, DType> &src,
819  const Tensor<cpu, 1, DType> &label);
826 template<typename DType>
827 inline void SoftmaxGrad(const Tensor<gpu, 2, DType> &dst,
828  const Tensor<gpu, 2, DType> &src,
829  const Tensor<gpu, 1, DType> &label);
838 template<bool clip = true, typename IndexType, typename DType>
839 inline void AddTakeGrad(Tensor<cpu, 2, DType> dst,
840  const Tensor<cpu, 1, IndexType>& index,
841  const Tensor<cpu, 2, DType> &src);
850 template<bool clip = true, typename IndexType, typename DType>
851 inline void AddTakeGrad(Tensor<gpu, 2, DType> dst,
852  const Tensor<gpu, 1, IndexType>& index,
853  const Tensor<gpu, 2, DType> &src);
863 template<typename IndexType, typename DType>
865  const Tensor<cpu, 1, IndexType>& sorted,
866  const Tensor<cpu, 1, IndexType>& index,
867  const Tensor<cpu, 2, DType> &src);
877 template<typename IndexType, typename DType>
879  const Tensor<gpu, 1, IndexType>& sorted,
880  const Tensor<gpu, 1, IndexType>& index,
881  const Tensor<gpu, 2, DType> &src);
890 template<typename IndexType, typename DType>
891 inline void IndexFill(Tensor<cpu, 2, DType> dst,
892  const Tensor<cpu, 1, IndexType>& index,
893  const Tensor<cpu, 2, DType> &src);
902 template<typename IndexType, typename DType>
903 inline void IndexFill(Tensor<gpu, 2, DType> dst,
904  const Tensor<gpu, 1, IndexType>& index,
905  const Tensor<gpu, 2, DType> &src);
912 template<typename KDType, typename VDType>
914  bool is_ascend = true);
921 template<typename KDType, typename VDType>
923  bool is_ascend = true);
932 template<typename Device, typename VDType, typename SDType>
934 
935 // function declarations to support expression, no need to understand them
936 // these functions do not need to be directly used
949 template<typename Saver, typename R, int dim,
950  typename DType, typename E, int etype>
951 inline void MapExp(TRValue<R, cpu, dim, DType> *dst,
952  const expr::Exp<E, DType, etype> &exp);
965 template<typename Saver, typename R, int dim,
966  typename DType, typename E, int etype>
967 inline void MapExp(TRValue<R, gpu, dim, DType> *dst,
968  const expr::Exp<E, DType, etype> &exp);
982 template<typename Saver, typename Reducer,
983  typename R, typename DType, typename E, int etype>
985  const expr::Exp<E, DType, etype> &exp,
986  DType scale = 1);
1000 template<typename Saver, typename Reducer, typename R,
1001  typename DType, typename E, int etype>
1003  const expr::Exp<E, DType, etype> &exp,
1004  DType scale = 1);
1019 template<typename Saver, typename Reducer, int dimkeep,
1020  typename R, typename DType, typename E, int etype>
1022  const expr::Exp<E, DType, etype> &exp,
1023  DType scale = 1);
1038 template<typename Saver, typename Reducer, int dimkeep,
1039  typename R, typename DType, typename E, int etype>
1041  const expr::Exp<E, DType, etype> &exp,
1042  DType scale = 1);
1049 template<typename Device, typename DType>
1050 inline void VectorDot(Tensor<Device, 1, DType> dst,
1051  const Tensor<Device, 1, DType> &lhs,
1052  const Tensor<Device, 1, DType> &rhs);
1062 template<bool transpose_left, bool transpose_right, typename Device, typename DType>
1063 inline void BatchGEMM(Tensor<Device, 3, DType> dst,
1064  const Tensor<Device, 3, DType> &lhs,
1065  const Tensor<Device, 3, DType> &rhs,
1066  DType alpha,
1067  DType beta,
1068  Tensor<Device, 1, DType*> workspace);
1069 } // namespace mshadow
1070 // include headers
1071 #include "./stream_gpu-inl.h"
1072 #include "./extension.h"
1073 #include "./expr_engine-inl.h"
1074 #include "./tensor_cpu-inl.h"
1075 #include "./tensor_gpu-inl.h"
1076 #include "./io.h"
1077 #include "./tensor_container.h"
1078 #include "./random.h"
1079 // add definition of scalar related operators
1080 #ifdef MSHADOW_SCALAR_
1081  #error "MSHADOW_SCALAR_ must not be defined"
1082 #endif
1083 // enumerate all the scalar data type we aim to be good at
1084 #define MSHADOW_SCALAR_ float
1085 #include "./expr_scalar-inl.h"
1086 #undef MSHADOW_SCALAR_
1087 #define MSHADOW_SCALAR_ double
1088 #include "./expr_scalar-inl.h"
1089 #undef MSHADOW_SCALAR_
1090 #define MSHADOW_SCALAR_ int32_t
1091 #include "./expr_scalar-inl.h"
1092 #undef MSHADOW_SCALAR_
1093 #define MSHADOW_SCALAR_ int64_t
1094 #include "./expr_scalar-inl.h"
1095 #undef MSHADOW_SCALAR_
1096 #define MSHADOW_SCALAR_ mshadow::half::half_t
1097 #include "./expr_scalar-inl.h"
1098 #undef MSHADOW_SCALAR_
1099 #endif // MSHADOW_TENSOR_H_
MSHADOW_XINLINE bool operator==(const Shape< kDimension > &s) const
Definition: tensor.h:105
void VectorDot(Tensor< Device, 1, DType > dst, const Tensor< Device, 1, DType > &lhs, const Tensor< Device, 1, DType > &rhs)
CPU/GPU: 1 dimension vector dot.
Definition: tensor_cpu-inl.h:597
void FreeSpace(Tensor< cpu, dim, DType > *obj)
CPU/GPU: free the space of tensor, will set obj.dptr to NULL.
Definition: tensor_cpu-inl.h:140
Stream< Device > * stream_
Definition: tensor.h:574
MSHADOW_XINLINE index_t & operator[](int idx)
get corresponding index
Definition: tensor.h:90
void IndexFill(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix...
Definition: tensor_cpu-inl.h:547
Definition: base.h:496
void SoftmaxGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label)
CPU/GPU: softmax gradient.
Definition: tensor_cpu-inl.h:306
PaddingExp< SrcExp, DType, ExpInfo< SrcExp >::kDim > pad(const Exp< SrcExp, DType, etype > &src, index_t pad)
padding expression, pad a image with zeros on boundaries, padding affects shape[0], and shape[1]
Definition: pad.h:71
void BatchGEMM(Tensor< Device, 3, DType > dst, const Tensor< Device, 3, DType > &lhs, const Tensor< Device, 3, DType > &rhs, DType alpha, DType beta, Tensor< Device, 1, DType *> workspace)
CPU/GPU: dst = alpha * op(lhs) op(rhs) + beta * dst.
Definition: tensor_cpu-inl.h:610
DType * dptr_
pointer to the data
Definition: tensor.h:434
Tensor RValue, this is the super type of all kinds of possible tensors.
Definition: tensor.h:409
Stream< Device > * NewStream(bool create_blas_handle, bool create_dnn_handle, int dev_id=-1)
create a new stream from system
void Copy(Tensor< cpu, dim, DType > dst, const Tensor< cpu, dim, DType > &src, Stream< cpu > *stream=NULL)
copy data from one tensor to another, with same shape
Definition: tensor_cpu-inl.h:145
void ShutdownTensorEngine(void)
Shutdown tensor engine on current device this function should be called after all GPU tensor operatio...
shape of a tensor
Definition: tensor.h:53
MSHADOW_XINLINE index_t MSize(void) const
Definition: tensor.h:497
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape, Stream< Device > *stream)
Definition: tensor.h:581
void MapExp(TRValue< R, cpu, dim, DType > *dst, const expr::Exp< E, DType, etype > &exp)
CPU/GPU: map a expression to a tensor, this function calls MapPlan.
Definition: tensor_cpu-inl.h:207
Definition: stream_gpu-inl.h:37
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape)
constructor from data pointer and shape, without stride
Definition: tensor.h:456
Shape< dimension > shape_
shape of the tensor
Definition: tensor.h:436
MSHADOW_XINLINE Shape< 4 > Shape4(index_t s0, index_t s1, index_t s2, index_t s3)
construct a four dimension shape, stride will equal s0
Definition: tensor.h:240
void SortByKey(Tensor< cpu, 1, KDType > keys, Tensor< cpu, 1, VDType > values, bool is_ascend=true)
CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) ...
Definition: tensor_cpu-inl.h:558
Tensor< Device, dimension, DType > & operator=(const expr::Exp< E, DType, etype > &exp)
functions to fit expression template
Definition: tensor.h:556
void Softmax(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &energy)
CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) ...
Definition: tensor_cpu-inl.h:483
MSHADOW_XINLINE Shape< kSubdim > SubShape(void) const
get subshape that takes off largest dimension v *
Definition: tensor.h:169
void VectorizedSort(Tensor< Device, 1, VDType > values, Tensor< Device, 1, SDType > segments)
CPU/GPU: Sort the keys within each segment. (Stable sort is performed!) Segments is defined as an asc...
Definition: tensor_cpu-inl.h:589
void set_stream(Stream< Device > *stream)
set the stream to do computation of current tensor
Definition: tensor.h:471
base class of all rvalues
Definition: expression.h:148
MSHADOW_XINLINE index_t MSize(void) const
Definition: tensor.h:603
Definition: base.h:488
MSHADOW_XINLINE Tensor< Device, 1, DType > Slice(index_t begin, index_t end) const
Definition: tensor.h:595
static const bool kDevCPU
whether this device is CPU or not
Definition: tensor.h:41
void DeleteStream(Stream< Device > *stream)
delete the computing stream
void MapReduceKeepLowest(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) ...
Definition: tensor_cpu-inl.h:223
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape)
Definition: tensor.h:579
MSHADOW_XINLINE Shape< dimend - dimstart > Slice(void) const
slice the shape from start to end
Definition: tensor.h:185
MSHADOW_XINLINE const DType & operator[](index_t idx) const
Definition: tensor.h:612
#define MSHADOW_ALLOC_PAD
whether do padding during allocation
Definition: base.h:72
Definition: base.h:489
device name CPU
Definition: tensor.h:39
MSHADOW_XINLINE Shape< 1 > FlatTo1D(void) const
Definition: tensor.h:123
device name GPU
Definition: tensor.h:46
bool CheckIdle(void)
query whether the the stream is idle
Definition: tensor.h:395
#define MSHADOW_XINLINE
Definition: base.h:230
Tensor< Device, 1, DType > & operator=(const Tensor< Device, 1, DType > &exp)
implement the assignment of same type
Definition: tensor.h:617
MSHADOW_XINLINE Tensor(void)
default constructor
Definition: tensor.h:451
definitions of abstract expressions and expressions template
MSHADOW_XINLINE index_t size(index_t i) const
Definition: tensor.h:606
Tensor< Device, dimension, DType > & operator=(const Tensor< Device, dimension, DType > &exp)
implement the assignment of same type
Definition: tensor.h:546
Shape< 3 > ConvertLayout(const Shape< 3 > &src, int src_layout, int dst_layout)
Convert shape in src_layout to shape in dst_layout.
Definition: tensor.h:269
void CreateBlasHandle()
create a blas handle
Definition: tensor.h:399
int32_t index_t
type that will be used for index
Definition: base.h:343
MSHADOW_XINLINE bool operator!=(const Shape< kDimension > &s) const
Definition: tensor.h:116
void AllocSpace(Tensor< cpu, dim, DType > *obj, bool pad=MSHADOW_ALLOC_PAD)
CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible t...
Definition: tensor_cpu-inl.h:116
MSHADOW_XINLINE Shape< 2 > FlatTo2D(void) const
Definition: tensor.h:132
DType * dptr_
Definition: tensor.h:571
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:519
definitions of how expressions should be evaluated
definitions of operators in expression with respect to scalar this file will be included several time...
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:505
void AddTakeGradLargeBatch(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &sorted, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the bat...
Definition: tensor_cpu-inl.h:537
MSHADOW_XINLINE Shape< 5 > Shape5(index_t s0, index_t s1, index_t s2, index_t s3, index_t s4)
construct a five dimension shape, stride will equal s0
Definition: tensor.h:255
MSHADOW_XINLINE bool CheckContiguous(void) const
Definition: tensor.h:491
void SetDevice(int devid)
set the device of current thread to work on
MSHADOW_XINLINE Shape(const Shape< kDimension > &s)
constuctor
Definition: tensor.h:79
some extension of expressions, used to support something beyond elementwise op
MSHADOW_XINLINE Shape< 1 > Shape1(index_t s0)
construct a one dimension shape, stride will equal s0
Definition: tensor.h:206
index_t shape_[kDimension]
storing the dimension information
Definition: tensor.h:75
MSHADOW_XINLINE Tensor(const Shape< 1 > &shape)
Definition: tensor.h:577
MSHADOW_XINLINE Shape(void)
default constructor, do nothing
Definition: tensor.h:77
void InitTensorEngine(int device_id=0)
initialize tensor engine, used to call intialization functions of dependent libs this function should...
MSHADOW_XINLINE Shape< 2 > Shape2(index_t s0, index_t s1)
construct a two dimension shape, stride will equal s0
Definition: tensor.h:216
Definition: base.h:497
implementation of GPU code
MSHADOW_XINLINE Tensor< Device, 1, DType > FlatTo1D(void) const
flatten the tensor to 1 dimension
Definition: tensor.h:512
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:43
Definition: base.h:492
MSHADOW_XINLINE index_t ProdShape(int dimstart, int dimend) const
Definition: tensor.h:157
void Wait(void)
wait for all the computations associated with this stream to complete
Definition: tensor.h:390
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape, index_t stride, Stream< Device > *stream)
constructor from data pointer and shape
Definition: tensor.h:463
MSHADOW_XINLINE index_t MemSize(void) const
Definition: tensor.h:479
void MapReduceKeepHighDim(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) ...
Definition: tensor_cpu-inl.h:250
Definition: base.h:493
MSHADOW_XINLINE Tensor(const Shape< dimension > &shape)
constructor from shape
Definition: tensor.h:453
index_t stride_
Definition: tensor.h:573
Tensor< Device, dim, DType > NewTensor(const Shape< dim > &shape, DType initv, bool pad=MSHADOW_ALLOC_PAD, Stream< Device > *stream=NULL)
CPU/GPU: short cut to allocate and initialize a Tensor.
Definition: tensor_cpu-inl.h:132
defines how expression exp can be evaluated and stored into dst
Definition: expression.h:79
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape, Stream< Device > *stream)
constructor from data pointer and shape, without stride
Definition: tensor.h:459
Tensor< Device, 1, DType > & operator=(const expr::Exp< E, DType, etype > &exp)
Definition: tensor.h:626
MSHADOW_XINLINE bool CheckContiguous(void) const
Definition: tensor.h:600
implementation of GPU host code
tensor container that does memory allocation and resize like STL
void AddTakeGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim ...
Definition: tensor_cpu-inl.h:516
MSHADOW_XINLINE const index_t & operator[](int idx) const
get corresponding index
Definition: tensor.h:98
Definition: tensor.h:568
MSHADOW_XINLINE Shape< 3 > Shape3(index_t s0, index_t s1, index_t s2)
construct a three dimension shape, stride will equal s0
Definition: tensor.h:227
overloaded + operator between half_t and bf16_t
Definition: base.h:334
void set_stream(Stream< Device > *stream)
Definition: tensor.h:586
Random inline functions for tensor.
MSHADOW_XINLINE DType & operator[](index_t idx)
Definition: tensor.h:609
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape, index_t stride, Stream< Device > *stream)
Definition: tensor.h:583
MSHADOW_XINLINE Tensor< Device, 1, DType > FlatTo1D(void) const
Definition: tensor.h:589
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:441
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
Definition: tensor.h:592
Tensor< Device, 1, DType > & operator=(const DType &exp)
Definition: tensor.h:629
MSHADOW_XINLINE Tensor< Device, dimension, DType > Slice(index_t begin, index_t end) const
slice the tensor in highest dimension [begin,end)
Definition: tensor.h:538
general tensor
Definition: tensor.h:420
implementation of CPU host code
#define MSHADOW_DEFAULT_DTYPE
default data type for tensor string in code release, change it to default_real_t during development...
Definition: base.h:249
MSHADOW_XINLINE Tensor(void)
Definition: tensor.h:576
Stream< Device > * stream_
stream where the computation lies stream is a device dependency concept where each computation ...
Definition: tensor.h:446
Shape< 1 > shape_
Definition: tensor.h:572
MSHADOW_XINLINE index_t Size(void) const
Definition: tensor.h:144
MSHADOW_XINLINE Tensor< Device, kSubdim, DType > operator[](index_t idx) const
get a element of dimension - 1
Definition: tensor.h:527
Tensor< Device, dimension, DType > & operator=(const DType &exp)
functions to fit expression template
Definition: tensor.h:560
computaion stream structure, used for asynchronous computations
Definition: tensor.h:383