docs/api/tensor_8h_source.html

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 #ifndef MSHADOW_TENSOR_H_
 #define MSHADOW_TENSOR_H_
 #include <string>
 #include <iostream>
 #include "./base.h"
 #include "./expression.h"

 namespace mshadow {
 struct cpu {
   static const bool kDevCPU = true;
   static const int kDevMask = 1 << 0;
 };
 struct gpu {
   static const bool kDevCPU = false;
   static const int kDevMask = 1 << 1;
 };
 template<int ndim>
 struct Shape;

 template<int ndim>
 inline std::ostream &operator<<(std::ostream &os, const Shape<ndim> &shape); // NOLINT(*)

 template<int dimension>
 struct Shape {
   static const int kDimension = dimension;
   static const int kSubdim = dimension - 1;
   index_t shape_[kDimension];
   MSHADOW_XINLINE Shape(void) {}
   MSHADOW_XINLINE Shape(const Shape<kDimension> &s) {
     #pragma unroll
     for (int i = 0; i < kDimension; ++i) {
       this->shape_[i] = s[i];
     }
   }
   MSHADOW_XINLINE index_t &operator[](int idx) {
     return shape_[idx];
   }
   MSHADOW_XINLINE const index_t &operator[](int idx) const {
     return shape_[idx];
   }
   MSHADOW_XINLINE bool operator==(const Shape<kDimension> &s) const {
     #pragma unroll
     for (int i = 0; i < kDimension; ++i) {
       if (s.shape_[i] != this->shape_[i]) return false;
     }
     return true;
   }
   MSHADOW_XINLINE bool operator!=(const Shape<kDimension> &s) const {
     return !(*this == s);
   }
   MSHADOW_XINLINE Shape<1> FlatTo1D(void) const {
     Shape<1> s;
     s[0] = this->Size();
     return s;
   }
   MSHADOW_XINLINE Shape<2> FlatTo2D(void) const {
     Shape<2> s;
     s.shape_[1] = this->shape_[kDimension - 1];
     index_t ymax = 1;
     #pragma unroll
     for (int i = 0; i < kDimension - 1; ++i) {
       ymax *= this->shape_[i];
     }
     s.shape_[0] = ymax;
     return s;
   }
   MSHADOW_XINLINE index_t Size(void) const {
     index_t size = this->shape_[0];
     #pragma unroll
     for (int i = 1; i < kDimension; ++i) {
       size *= this->shape_[i];
     }
     return size;
   }
   MSHADOW_XINLINE index_t ProdShape(int dimstart, int dimend) const {
     index_t num = 1;
     #pragma unroll
     for (int i = dimstart; i < dimend; ++i) {
       num *= this->shape_[i];
     }
     return num;
   }
   MSHADOW_XINLINE Shape<kSubdim> SubShape(void) const {
     Shape<kSubdim> s;
     // for cuda
     #pragma unroll
     for (int i = 0; i < kSubdim; ++i) {
       s.shape_[i] = this->shape_[i + 1];
     }
     return s;
   }
   template<int dimstart, int dimend>
   MSHADOW_XINLINE Shape<dimend - dimstart> Slice(void) const {
     Shape<dimend - dimstart> s;
     #pragma unroll
     for (int i = dimstart; i < dimend; ++i) {
       s[i - dimstart] = this->shape_[i];
     }
     return s;
   }
   template<int dim>
   friend std::ostream &operator<<(std::ostream &os, const Shape<dim> &shape); // NOLINT(*)
 };  // Shape
 //------------------------------------------------
 // useful construction functions to generate shape
 //-------------------------------------------------
 MSHADOW_XINLINE Shape<1> Shape1(index_t s0) {
   Shape<1> s; s[0] = s0;
   return s;
 }
 MSHADOW_XINLINE Shape<2> Shape2(index_t s0, index_t s1) {
   Shape<2> s; s[0] = s0; s[1] = s1;
   return s;
 }
 MSHADOW_XINLINE Shape<3> Shape3(index_t s0, index_t s1, index_t s2) {
   Shape<3> s;
   s[0] = s0; s[1] = s1; s[2] = s2;
   return s;
 }
 MSHADOW_XINLINE Shape<4> Shape4(index_t s0, index_t s1,
                                 index_t s2, index_t s3) {
   Shape<4> s;
   s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
   return s;
 }
 MSHADOW_XINLINE Shape<5> Shape5(index_t s0, index_t s1, index_t s2,
                                 index_t s3, index_t s4) {
   Shape<5> s;
   s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; s[4] = s4;
   return s;
 }

 inline Shape<3> ConvertLayout(const Shape<3>& src, int src_layout, int dst_layout) {
   Shape<3> dst;
   switch (src_layout) {
   case kNCW:
     dst = src;
     break;
   case kNWC:
     dst[0] = src[0];
     dst[1] = src[2];
     dst[2] = src[1];
     break;
   default:
     LOG(FATAL) << "Invalid layout for 3d shape " << src_layout;
   }
   switch (dst_layout) {
   case kNCW:
     return dst;
   case kNWC:
     {
       index_t tmp = dst[1];
       dst[1] = dst[2];
       dst[2] = tmp;
     }
     break;
   default:
     LOG(FATAL) << "Invalid layout for 3d shape " << src_layout;
   }
   return dst;
 }

 inline Shape<4> ConvertLayout(const Shape<4>& src, int src_layout, int dst_layout) {
   Shape<4> dst;
   switch (src_layout) {
   case kNCHW:
     dst = src;
     break;
   case kNHWC:
     dst[0] = src[0];
     dst[2] = src[1];
     dst[3] = src[2];
     dst[1] = src[3];
     break;
   default:
     LOG(FATAL) << "Invalid layout for 4d shape " << src_layout;
     dst = src;  // fixes compiler warning
   }
   Shape<4> dst2;
   switch (dst_layout) {
   case kNCHW:
     return dst;
   case kNHWC:
     dst2[0] = dst[0];
     dst2[1] = dst[2];
     dst2[2] = dst[3];
     dst2[3] = dst[1];
     break;
   default:
     LOG(FATAL) << "Invalid layout for 4d shape " << src_layout;
     dst2 = src;  // fixes compiler warning
   }
   return dst2;
 }

 inline Shape<5> ConvertLayout(const Shape<5>& src, int src_layout, int dst_layout) {
   Shape<5> dst;
   switch (src_layout) {
   case kNCDHW:
     dst = src;
     break;
   case kNDHWC:
     dst[0] = src[0];
     dst[2] = src[1];
     dst[3] = src[2];
     dst[4] = src[3];
     dst[1] = src[4];
     break;
   default:
     LOG(FATAL) << "Invalid layout for 5d shape " << src_layout;
   }
   Shape<5> dst2;
   switch (dst_layout) {
   case kNCDHW:
     return dst;
   case kNDHWC:
     dst2[0] = dst[0];
     dst2[1] = dst[2];
     dst2[2] = dst[3];
     dst2[3] = dst[4];
     dst2[4] = dst[1];
     break;
   default:
     LOG(FATAL) << "Invalid layout for 5d shape " << src_layout;
   }
   return dst2;
 }

 template<typename Device>
 struct Stream {
   // this is only a dummy implementation for CPU
   // for GPU, the actual implementation will be specialized in tensor_gpu-inl.h
   inline void Wait(void) {}
   inline bool CheckIdle(void) {
     return true;
   }
   inline void CreateBlasHandle() {}
 };
 template<typename Container, typename Device, int dimension, typename DType>
 struct TRValue: public expr::RValueExp<Container, DType> {
 };
 // more compact template
 template<typename Device, int dimension,
          typename DType MSHADOW_DEFAULT_DTYPE>
 struct Tensor: public TRValue<Tensor<Device, dimension, DType>,
                               Device, dimension, DType> {
  public:
   //--------------------------------
   // struct memembers
   //--------------------------------
   static const bool kDevCPU = Device::kDevCPU;
   static const int  kSubdim = dimension - 1;
   //--------------------------------
   // struct memembers
   //--------------------------------
   DType *dptr_ = nullptr;
   Shape<dimension> shape_;
   index_t stride_;
   Stream<Device> *stream_;
   //--------------------------------
   // functions
   //--------------------------------
   MSHADOW_XINLINE Tensor(void) : stream_(NULL) {}
   MSHADOW_XINLINE Tensor(const Shape<dimension> &shape)
       : shape_(shape), stream_(NULL) {}
   MSHADOW_XINLINE Tensor(DType *dptr, const Shape<dimension> &shape)
       : dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(NULL) {}
   MSHADOW_XINLINE Tensor(DType *dptr, const Shape<dimension> &shape,
                          Stream<Device> *stream)
     : dptr_(dptr), shape_(shape), stride_(shape[kSubdim]), stream_(stream) {}
   MSHADOW_XINLINE Tensor(DType *dptr,
                          const Shape<dimension> &shape,
                          index_t stride, Stream<Device> *stream)
       : dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
   inline void set_stream(Stream<Device> *stream) {
     this->stream_ = stream;
   }
   template<int startdim>
   MSHADOW_XINLINE index_t MemSize(void) const {
     index_t memsz = this->stride_;
     #pragma unroll
     for (int i = startdim; i < kSubdim; ++i) {
       memsz *= this->shape_[i];
     }
     return memsz;
   }
   MSHADOW_XINLINE bool CheckContiguous(void) const {
     return this->shape_[dimension - 1] == stride_;
   }
   MSHADOW_XINLINE index_t MSize(void) const {
     return this->MemSize<0>();
   }
   MSHADOW_XINLINE index_t size(int idx) const {
     return shape_[idx];
   }
   MSHADOW_XINLINE Tensor<Device, 1, DType> FlatTo1D(void) const {
     return Tensor<Device, 1, DType>(dptr_, shape_.FlatTo1D(), stride_, stream_);
   }
   MSHADOW_XINLINE Tensor<Device, 2, DType> FlatTo2D(void) const {
     return Tensor<Device, 2, DType>(dptr_, shape_.FlatTo2D(), stride_, stream_);
   }
   MSHADOW_XINLINE Tensor<Device, kSubdim, DType> operator[](index_t idx) const {
     return Tensor<Device, kSubdim, DType>(dptr_ + this->MemSize<1>() * idx,
                                           shape_.SubShape(), stride_, stream_);
   }
   MSHADOW_XINLINE Tensor<Device, dimension, DType>
   Slice(index_t begin, index_t end) const {
     Shape<dimension> s = this->shape_;
     s[0] = end - begin;
     return Tensor<Device, dimension, DType>(dptr_ + this->MemSize<1>() * begin,
                                             s, stride_, stream_);
   }
   inline Tensor<Device, dimension, DType> &
   operator=(const Tensor<Device, dimension, DType> &exp) {
     dptr_ = exp.dptr_;
     shape_ = exp.shape_;
     stride_ = exp.stride_;
     stream_ = exp.stream_;
     return *this;
   }
   template<typename E, int etype>
   inline Tensor<Device, dimension, DType> &
   operator=(const expr::Exp<E, DType, etype> &exp) {
     return this->__assign(exp);
   }
   inline Tensor<Device, dimension, DType> &operator=(const DType &exp) {
     return this->__assign(exp);
   }
 };
 /*
  *  respecialized class Tensor1D, thei is due to different implementation in operator[]
  */
 template<typename Device, typename DType>
 struct Tensor<Device, 1, DType>:
       public TRValue<Tensor<Device, 1, DType>, Device, 1, DType> {
  public:
   DType *dptr_;
   Shape<1> shape_;
   index_t stride_;
   Stream<Device> *stream_;
   // constructor
   MSHADOW_XINLINE Tensor(void) : stream_(NULL) {}
   MSHADOW_XINLINE Tensor(const Shape<1> &shape)
       : shape_(shape), stream_(NULL) {}
   MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape)
       : dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(NULL) {}
   MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape, Stream<Device> *stream)
       : dptr_(dptr), shape_(shape), stride_(shape[0]), stream_(stream) {}
   MSHADOW_XINLINE Tensor(DType *dptr, Shape<1> shape,
                          index_t stride, Stream<Device> *stream)
       : dptr_(dptr), shape_(shape), stride_(stride), stream_(stream) {}
   inline void set_stream(Stream<Device> *stream) {
     this->stream_ = stream;
   }
   MSHADOW_XINLINE Tensor<Device, 1, DType> FlatTo1D(void) const {
     return *this;
   }
   MSHADOW_XINLINE Tensor<Device, 2, DType> FlatTo2D(void) const {
     return Tensor<Device, 2, DType>(dptr_, shape_.FlatTo2D(), stride_, stream_);
   }
   MSHADOW_XINLINE Tensor<Device, 1, DType> Slice(index_t begin, index_t end) const {
     Shape<1> s;
     s[0] = end  - begin;
     return Tensor<Device, 1, DType>(dptr_ + begin, s, s[0], stream_);
   }
   MSHADOW_XINLINE bool CheckContiguous(void) const {
     return true;
   }
   MSHADOW_XINLINE index_t MSize(void) const {
     return shape_[0];
   }
   MSHADOW_XINLINE index_t size(index_t i) const {
     return shape_[0];
   }
   MSHADOW_XINLINE DType &operator[](index_t idx) {
     return dptr_[idx];
   }
   MSHADOW_XINLINE const DType &operator[](index_t idx) const {
     return dptr_[idx];
   }
   inline Tensor<Device, 1, DType> &
   operator=(const Tensor<Device, 1, DType> &exp) {
     dptr_ = exp.dptr_;
     shape_ = exp.shape_;
     stride_ = exp.stride_;
     stream_ = exp.stream_;
     return *this;
   }
   template<typename E, int etype>
   inline Tensor<Device, 1, DType> &
   operator=(const expr::Exp<E, DType, etype> &exp) {
     return this->__assign(exp);
   }
   inline Tensor<Device, 1, DType> &operator=(const DType &exp) {
     return this->__assign(exp);
   }
 };
 //------------------------
 // Function Declarations
 //-----------------------
 template<typename Device>
 inline void InitTensorEngine(int device_id = 0);
 template<typename Device>
 inline void ShutdownTensorEngine(void);
 template<typename Device>
 inline void SetDevice(int devid);
 template<typename Device>
 inline Stream<Device> *NewStream(bool create_blas_handle,
                                  bool create_dnn_handle,
                                  int dev_id = -1);
 template<typename Device>
 inline Stream<Device> *NewStream(int dev_id) {
   return NewStream<Device>(true, false, dev_id);
 }
 template<typename Device>
 inline void DeleteStream(Stream<Device> *stream);
 template<int dim, typename DType>
 inline void AllocSpace(Tensor<cpu, dim, DType> *obj,
                        bool pad = MSHADOW_ALLOC_PAD);
 template<int dim, typename DType>
 inline void AllocSpace(Tensor<gpu, dim, DType> *obj,
                        bool pad = MSHADOW_ALLOC_PAD);
 template<int dim, typename DType>
 inline void FreeSpace(Tensor<cpu, dim, DType> *obj);
 template<int dim, typename DType>
 inline void FreeSpace(Tensor<gpu, dim, DType> *obj);
 template<typename Device, typename DType, int dim>
 inline Tensor<Device, dim, DType> NewTensor(const Shape<dim> &shape,
                                             DType initv,
                                             bool pad = MSHADOW_ALLOC_PAD,
                                             Stream<Device> *stream = NULL);
 template<int dim, typename DType>
 inline void Copy(Tensor<cpu, dim, DType> dst,
                  const Tensor<cpu, dim, DType> &src,
                  Stream<cpu> *stream = NULL);
 template<int dim, typename DType>
 inline void Copy(Tensor<cpu, dim, DType> dst,
                  const Tensor<gpu, dim, DType> &src,
                  Stream<gpu> *stream = NULL);
 template<int dim, typename DType>
 inline void Copy(Tensor<gpu, dim, DType> dst,
                  const Tensor<cpu, dim, DType> &src,
                  Stream<gpu> *stream = NULL);
 template<int dim, typename DType>
 inline void Copy(Tensor<gpu, dim, DType> dst,
                  const Tensor<gpu, dim, DType> &src,
                  Stream<gpu> *stream = NULL);
 template<typename DType>
 inline void Softmax(Tensor<cpu, 2, DType> dst, const Tensor<cpu, 2, DType> &energy);
 template<typename DType>
 inline void Softmax(Tensor<gpu, 2, DType> dst, const Tensor<gpu, 2, DType> &energy);

 template<typename DType>
 inline void SoftmaxGrad(Tensor<cpu, 2, DType> dst,
                         const Tensor<cpu, 2, DType> &src,
                         const Tensor<cpu, 1, DType> &label);
 template<typename DType>
 inline void SoftmaxGrad(const Tensor<gpu, 2, DType> &dst,
                         const Tensor<gpu, 2, DType> &src,
                         const Tensor<gpu, 1, DType> &label);
 template<bool clip = true, typename IndexType, typename DType>
 inline void AddTakeGrad(Tensor<cpu, 2, DType> dst,
                         const Tensor<cpu, 1, IndexType>& index,
                         const Tensor<cpu, 2, DType> &src);
 template<bool clip = true, typename IndexType, typename DType>
 inline void AddTakeGrad(Tensor<gpu, 2, DType> dst,
                         const Tensor<gpu, 1, IndexType>& index,
                         const Tensor<gpu, 2, DType> &src);
 template<typename IndexType, typename DType>
 inline void AddTakeGradLargeBatch(Tensor<cpu, 2, DType> dst,
                                   const Tensor<cpu, 1, IndexType>& sorted,
                                   const Tensor<cpu, 1, IndexType>& index,
                                   const Tensor<cpu, 2, DType> &src);
 template<typename IndexType, typename DType>
 inline void AddTakeGradLargeBatch(Tensor<gpu, 2, DType> dst,
                                   const Tensor<gpu, 1, IndexType>& sorted,
                                   const Tensor<gpu, 1, IndexType>& index,
                                   const Tensor<gpu, 2, DType> &src);
 template<typename IndexType, typename DType>
 inline void IndexFill(Tensor<cpu, 2, DType> dst,
                       const Tensor<cpu, 1, IndexType>& index,
                       const Tensor<cpu, 2, DType> &src);
 template<typename IndexType, typename DType>
 inline void IndexFill(Tensor<gpu, 2, DType> dst,
                       const Tensor<gpu, 1, IndexType>& index,
                       const Tensor<gpu, 2, DType> &src);
 template<typename KDType, typename VDType>
 inline void SortByKey(Tensor<cpu, 1, KDType> keys, Tensor<cpu, 1, VDType> values,
                       bool is_ascend = true);
 template<typename KDType, typename VDType>
 inline void SortByKey(Tensor<gpu, 1, KDType> keys, Tensor<gpu, 1, VDType> values,
                       bool is_ascend = true);
 template<typename Device, typename VDType, typename SDType>
 inline void VectorizedSort(Tensor<Device, 1, VDType> values, Tensor<Device, 1, SDType> segments);

 // function declarations to support expression, no need to understand them
 // these functions do not need to be directly used
 template<typename Saver, typename R, int dim,
          typename DType, typename E, int etype>
 inline void MapExp(TRValue<R, cpu, dim, DType> *dst,
                    const expr::Exp<E, DType, etype> &exp);
 template<typename Saver, typename R, int dim,
          typename DType, typename E, int etype>
 inline void MapExp(TRValue<R, gpu, dim, DType> *dst,
                    const expr::Exp<E, DType, etype> &exp);
 template<typename Saver, typename Reducer,
          typename R, typename DType, typename E, int etype>
 inline void MapReduceKeepLowest(TRValue<R, cpu, 1, DType> *dst,
                                 const expr::Exp<E, DType, etype> &exp,
                                 DType scale = 1);
 template<typename Saver, typename Reducer, typename R,
          typename DType, typename E, int etype>
 inline void MapReduceKeepLowest(TRValue<R, gpu, 1, DType> *dst,
                                 const expr::Exp<E, DType, etype> &exp,
                                 DType scale = 1);
 template<typename Saver, typename Reducer, int dimkeep,
          typename R, typename DType, typename E, int etype>
 inline void MapReduceKeepHighDim(TRValue<R, cpu, 1, DType> *dst,
                                  const expr::Exp<E, DType, etype> &exp,
                                  DType scale = 1);
 template<typename Saver, typename Reducer, int dimkeep,
          typename R, typename DType, typename E, int etype>
 inline void MapReduceKeepHighDim(TRValue<R, gpu, 1, DType> *dst,
                                  const expr::Exp<E, DType, etype> &exp,
                                  DType scale = 1);
 template<typename Device, typename DType>
 inline void VectorDot(Tensor<Device, 1, DType> dst,
                       const Tensor<Device, 1, DType> &lhs,
                       const Tensor<Device, 1, DType> &rhs);
 template<bool transpose_left, bool transpose_right, typename Device, typename DType>
 inline void BatchGEMM(Tensor<Device, 3, DType> dst,
                       const Tensor<Device, 3, DType> &lhs,
                       const Tensor<Device, 3, DType> &rhs,
                       DType alpha,
                       DType beta,
                       Tensor<Device, 1, DType*> workspace);
 }  // namespace mshadow
 // include headers
 #include "./stream_gpu-inl.h"
 #include "./extension.h"
 #include "./expr_engine-inl.h"
 #include "./tensor_cpu-inl.h"
 #include "./tensor_gpu-inl.h"
 #include "./io.h"
 #include "./tensor_container.h"
 #include "./random.h"
 // add definition of scalar related operators
 #ifdef MSHADOW_SCALAR_
   #error "MSHADOW_SCALAR_ must not be defined"
 #endif
 // enumerate all the scalar data type we aim to be good at
 #define MSHADOW_SCALAR_ float
 #include "./expr_scalar-inl.h"
 #undef MSHADOW_SCALAR_
 #define MSHADOW_SCALAR_ double
 #include "./expr_scalar-inl.h"
 #undef MSHADOW_SCALAR_
 #define MSHADOW_SCALAR_ int32_t
 #include "./expr_scalar-inl.h"
 #undef MSHADOW_SCALAR_
 #define MSHADOW_SCALAR_ int64_t
 #include "./expr_scalar-inl.h"
 #undef MSHADOW_SCALAR_
 #define MSHADOW_SCALAR_ mshadow::half::half_t
 #include "./expr_scalar-inl.h"
 #undef MSHADOW_SCALAR_
 #endif  // MSHADOW_TENSOR_H_
mshadow::Shape::operator==
MSHADOW_XINLINE bool operator==(const Shape< kDimension > &s) const
Definition: tensor.h:105

mshadow::VectorDot
void VectorDot(Tensor< Device, 1, DType > dst, const Tensor< Device, 1, DType > &lhs, const Tensor< Device, 1, DType > &rhs)
CPU/GPU: 1 dimension vector dot.
Definition: tensor_cpu-inl.h:597

mshadow::FreeSpace
void FreeSpace(Tensor< cpu, dim, DType > *obj)
CPU/GPU: free the space of tensor, will set obj.dptr to NULL.
Definition: tensor_cpu-inl.h:140

mshadow::Tensor< Device, 1, DType >::stream_
Stream< Device > * stream_
Definition: tensor.h:574

mshadow::Shape::operator[]
MSHADOW_XINLINE index_t & operator[](int idx)
get corresponding index
Definition: tensor.h:90

mshadow::IndexFill
void IndexFill(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Fill the values of the destination matrix to specific rows in the source matrix...
Definition: tensor_cpu-inl.h:547

mshadow::kNCDHW
Definition: base.h:496

mshadow::SoftmaxGrad
void SoftmaxGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &src, const Tensor< cpu, 1, DType > &label)
CPU/GPU: softmax gradient.
Definition: tensor_cpu-inl.h:306

mshadow::expr::pad
PaddingExp< SrcExp, DType, ExpInfo< SrcExp >::kDim > pad(const Exp< SrcExp, DType, etype > &src, index_t pad)
padding expression, pad a image with zeros on boundaries, padding affects shape[0], and shape[1]
Definition: pad.h:71

mshadow::BatchGEMM
void BatchGEMM(Tensor< Device, 3, DType > dst, const Tensor< Device, 3, DType > &lhs, const Tensor< Device, 3, DType > &rhs, DType alpha, DType beta, Tensor< Device, 1, DType *> workspace)
CPU/GPU: dst = alpha * op(lhs) op(rhs) + beta * dst.
Definition: tensor_cpu-inl.h:610

mshadow::Tensor::dptr_
DType * dptr_
pointer to the data
Definition: tensor.h:434

mshadow::TRValue
Tensor RValue, this is the super type of all kinds of possible tensors.
Definition: tensor.h:409

mshadow::NewStream
Stream< Device > * NewStream(bool create_blas_handle, bool create_dnn_handle, int dev_id=-1)
create a new stream from system

mshadow::Copy
void Copy(Tensor< cpu, dim, DType > dst, const Tensor< cpu, dim, DType > &src, Stream< cpu > *stream=NULL)
copy data from one tensor to another, with same shape
Definition: tensor_cpu-inl.h:145

mshadow::ShutdownTensorEngine
void ShutdownTensorEngine(void)
Shutdown tensor engine on current device this function should be called after all GPU tensor operatio...

mshadow::Shape
shape of a tensor
Definition: tensor.h:53

mshadow::Tensor::MSize
MSHADOW_XINLINE index_t MSize(void) const
Definition: tensor.h:497

mshadow::Tensor< Device, 1, DType >::Tensor
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape, Stream< Device > *stream)
Definition: tensor.h:581

mshadow::MapExp
void MapExp(TRValue< R, cpu, dim, DType > *dst, const expr::Exp< E, DType, etype > &exp)
CPU/GPU: map a expression to a tensor, this function calls MapPlan.
Definition: tensor_cpu-inl.h:207

mshadow::Stream< gpu >
Definition: stream_gpu-inl.h:37

mshadow::Tensor::Tensor
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape)
constructor from data pointer and shape, without stride
Definition: tensor.h:456

mshadow::Tensor::shape_
Shape< dimension > shape_
shape of the tensor
Definition: tensor.h:436

mshadow::Shape4
MSHADOW_XINLINE Shape< 4 > Shape4(index_t s0, index_t s1, index_t s2, index_t s3)
construct a four dimension shape, stride will equal s0
Definition: tensor.h:240

mshadow::SortByKey
void SortByKey(Tensor< cpu, 1, KDType > keys, Tensor< cpu, 1, VDType > values, bool is_ascend=true)
CPU/GPU: Sort key-value pairs stored in separate places. (Stable sort is performed!) ...
Definition: tensor_cpu-inl.h:558

mshadow::Tensor::operator=
Tensor< Device, dimension, DType > & operator=(const expr::Exp< E, DType, etype > &exp)
functions to fit expression template
Definition: tensor.h:556

mshadow::Softmax
void Softmax(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 2, DType > &energy)
CPU/GPU: normalize softmax: dst[i][j] = exp(energy[i][j]) /(sum_j exp(energy[i][j])) ...
Definition: tensor_cpu-inl.h:483

mshadow::Shape::SubShape
MSHADOW_XINLINE Shape< kSubdim > SubShape(void) const
get subshape that takes off largest dimension v *
Definition: tensor.h:169

mshadow::VectorizedSort
void VectorizedSort(Tensor< Device, 1, VDType > values, Tensor< Device, 1, SDType > segments)
CPU/GPU: Sort the keys within each segment. (Stable sort is performed!) Segments is defined as an asc...
Definition: tensor_cpu-inl.h:589

mshadow::Tensor::set_stream
void set_stream(Stream< Device > *stream)
set the stream to do computation of current tensor
Definition: tensor.h:471

mshadow::expr::RValueExp
base class of all rvalues
Definition: expression.h:148

mshadow::Tensor< Device, 1, DType >::MSize
MSHADOW_XINLINE index_t MSize(void) const
Definition: tensor.h:603

mshadow::kNCHW
Definition: base.h:488

mshadow::Tensor< Device, 1, DType >::Slice
MSHADOW_XINLINE Tensor< Device, 1, DType > Slice(index_t begin, index_t end) const
Definition: tensor.h:595

mshadow::cpu::kDevCPU
static const bool kDevCPU
whether this device is CPU or not
Definition: tensor.h:41

mshadow::DeleteStream
void DeleteStream(Stream< Device > *stream)
delete the computing stream

mshadow::MapReduceKeepLowest
void MapReduceKeepLowest(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in lowest dimension (dimension 0) ...
Definition: tensor_cpu-inl.h:223

mshadow::Tensor< Device, 1, DType >::Tensor
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape)
Definition: tensor.h:579

mshadow::Shape::Slice
MSHADOW_XINLINE Shape< dimend - dimstart > Slice(void) const
slice the shape from start to end
Definition: tensor.h:185

mshadow::Tensor< Device, 1, DType >::operator[]
MSHADOW_XINLINE const DType & operator[](index_t idx) const
Definition: tensor.h:612

MSHADOW_ALLOC_PAD
#define MSHADOW_ALLOC_PAD
whether do padding during allocation
Definition: base.h:72

mshadow::kNHWC
Definition: base.h:489

mshadow::cpu
device name CPU
Definition: tensor.h:39

mshadow::Shape::FlatTo1D
MSHADOW_XINLINE Shape< 1 > FlatTo1D(void) const
Definition: tensor.h:123

mshadow::gpu
device name GPU
Definition: tensor.h:46

mshadow::Stream::CheckIdle
bool CheckIdle(void)
query whether the the stream is idle
Definition: tensor.h:395

MSHADOW_XINLINE
#define MSHADOW_XINLINE
Definition: base.h:230

mshadow::Tensor< Device, 1, DType >::operator=
Tensor< Device, 1, DType > & operator=(const Tensor< Device, 1, DType > &exp)
implement the assignment of same type
Definition: tensor.h:617

mshadow::Tensor::Tensor
MSHADOW_XINLINE Tensor(void)
default constructor
Definition: tensor.h:451

expression.h
definitions of abstract expressions and expressions template

mshadow::Tensor< Device, 1, DType >::size
MSHADOW_XINLINE index_t size(index_t i) const
Definition: tensor.h:606

mshadow::Tensor::operator=
Tensor< Device, dimension, DType > & operator=(const Tensor< Device, dimension, DType > &exp)
implement the assignment of same type
Definition: tensor.h:546

mshadow::ConvertLayout
Shape< 3 > ConvertLayout(const Shape< 3 > &src, int src_layout, int dst_layout)
Convert shape in src_layout to shape in dst_layout.
Definition: tensor.h:269

mshadow::Stream::CreateBlasHandle
void CreateBlasHandle()
create a blas handle
Definition: tensor.h:399

mshadow::index_t
int32_t index_t
type that will be used for index
Definition: base.h:343

mshadow::Shape::operator!=
MSHADOW_XINLINE bool operator!=(const Shape< kDimension > &s) const
Definition: tensor.h:116

mshadow::AllocSpace
void AllocSpace(Tensor< cpu, dim, DType > *obj, bool pad=MSHADOW_ALLOC_PAD)
CPU/CPU: allocate space for CTensor, according to the shape in the obj this function is responsible t...
Definition: tensor_cpu-inl.h:116

mshadow::Shape::FlatTo2D
MSHADOW_XINLINE Shape< 2 > FlatTo2D(void) const
Definition: tensor.h:132

mshadow::Tensor< Device, 1, DType >::dptr_
DType * dptr_
Definition: tensor.h:571

mshadow::Tensor::FlatTo2D
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
flatten the tensor to 2 dimension, collapse the higher dimensions together
Definition: tensor.h:519

expr_engine-inl.h
definitions of how expressions should be evaluated

expr_scalar-inl.h
definitions of operators in expression with respect to scalar this file will be included several time...

mshadow::Tensor::size
MSHADOW_XINLINE index_t size(int idx) const
return size of i-th dimension, start counting from highest dimension
Definition: tensor.h:505

mshadow::AddTakeGradLargeBatch
void AddTakeGradLargeBatch(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &sorted, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[sorted[i]] += src[index[i]] Called when the bat...
Definition: tensor_cpu-inl.h:537

mshadow::Shape5
MSHADOW_XINLINE Shape< 5 > Shape5(index_t s0, index_t s1, index_t s2, index_t s3, index_t s4)
construct a five dimension shape, stride will equal s0
Definition: tensor.h:255

mshadow::Tensor::CheckContiguous
MSHADOW_XINLINE bool CheckContiguous(void) const
Definition: tensor.h:491

mshadow::SetDevice
void SetDevice(int devid)
set the device of current thread to work on

mshadow::Shape::Shape
MSHADOW_XINLINE Shape(const Shape< kDimension > &s)
constuctor
Definition: tensor.h:79

extension.h
some extension of expressions, used to support something beyond elementwise op

mshadow::Shape1
MSHADOW_XINLINE Shape< 1 > Shape1(index_t s0)
construct a one dimension shape, stride will equal s0
Definition: tensor.h:206

mshadow::Shape::shape_
index_t shape_[kDimension]
storing the dimension information
Definition: tensor.h:75

mshadow::Tensor< Device, 1, DType >::Tensor
MSHADOW_XINLINE Tensor(const Shape< 1 > &shape)
Definition: tensor.h:577

mshadow::Shape::Shape
MSHADOW_XINLINE Shape(void)
default constructor, do nothing
Definition: tensor.h:77

mshadow::InitTensorEngine
void InitTensorEngine(int device_id=0)
initialize tensor engine, used to call intialization functions of dependent libs this function should...

mshadow::Shape2
MSHADOW_XINLINE Shape< 2 > Shape2(index_t s0, index_t s1)
construct a two dimension shape, stride will equal s0
Definition: tensor.h:216

mshadow::kNDHWC
Definition: base.h:497

stream_gpu-inl.h
implementation of GPU code

mshadow::Tensor::FlatTo1D
MSHADOW_XINLINE Tensor< Device, 1, DType > FlatTo1D(void) const
flatten the tensor to 1 dimension
Definition: tensor.h:512

mshadow::cpu::kDevMask
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:43

mshadow::kNCW
Definition: base.h:492

mshadow::Shape::ProdShape
MSHADOW_XINLINE index_t ProdShape(int dimstart, int dimend) const
Definition: tensor.h:157

mshadow::Stream::Wait
void Wait(void)
wait for all the computations associated with this stream to complete
Definition: tensor.h:390

mshadow::Tensor::Tensor
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape, index_t stride, Stream< Device > *stream)
constructor from data pointer and shape
Definition: tensor.h:463

mshadow::Tensor::MemSize
MSHADOW_XINLINE index_t MemSize(void) const
Definition: tensor.h:479

mshadow::MapReduceKeepHighDim
void MapReduceKeepHighDim(TRValue< R, cpu, 1, DType > *dst, const expr::Exp< E, DType, etype > &exp, DType scale=1)
CPU/GPU: map a expression, do reduction to 1D Tensor in third dimension (dimension 2) ...
Definition: tensor_cpu-inl.h:250

mshadow::kNWC
Definition: base.h:493

mshadow::Tensor::Tensor
MSHADOW_XINLINE Tensor(const Shape< dimension > &shape)
constructor from shape
Definition: tensor.h:453

mshadow::Tensor< Device, 1, DType >::stride_
index_t stride_
Definition: tensor.h:573

mshadow::NewTensor
Tensor< Device, dim, DType > NewTensor(const Shape< dim > &shape, DType initv, bool pad=MSHADOW_ALLOC_PAD, Stream< Device > *stream=NULL)
CPU/GPU: short cut to allocate and initialize a Tensor.
Definition: tensor_cpu-inl.h:132

mshadow::expr::Exp
defines how expression exp can be evaluated and stored into dst
Definition: expression.h:79

mshadow::Tensor::Tensor
MSHADOW_XINLINE Tensor(DType *dptr, const Shape< dimension > &shape, Stream< Device > *stream)
constructor from data pointer and shape, without stride
Definition: tensor.h:459

mshadow::Tensor< Device, 1, DType >::operator=
Tensor< Device, 1, DType > & operator=(const expr::Exp< E, DType, etype > &exp)
Definition: tensor.h:626

mshadow::Tensor< Device, 1, DType >::CheckContiguous
MSHADOW_XINLINE bool CheckContiguous(void) const
Definition: tensor.h:600

tensor_gpu-inl.h
implementation of GPU host code

tensor_container.h
tensor container that does memory allocation and resize like STL

mshadow::AddTakeGrad
void AddTakeGrad(Tensor< cpu, 2, DType > dst, const Tensor< cpu, 1, IndexType > &index, const Tensor< cpu, 2, DType > &src)
CPU/GPU: Gradient accumulate of embedding matrix. dst[index[i]] += src[i] Called when the featuredim ...
Definition: tensor_cpu-inl.h:516

mshadow::Shape::operator[]
MSHADOW_XINLINE const index_t & operator[](int idx) const
get corresponding index
Definition: tensor.h:98

mshadow::Tensor< Device, 1, DType >
Definition: tensor.h:568

mshadow::Shape3
MSHADOW_XINLINE Shape< 3 > Shape3(index_t s0, index_t s1, index_t s2)
construct a three dimension shape, stride will equal s0
Definition: tensor.h:227

mshadow
overloaded + operator between half_t and bf16_t
Definition: base.h:334

mshadow::Tensor< Device, 1, DType >::set_stream
void set_stream(Stream< Device > *stream)
Definition: tensor.h:586

random.h
Random inline functions for tensor.

mshadow::Tensor< Device, 1, DType >::operator[]
MSHADOW_XINLINE DType & operator[](index_t idx)
Definition: tensor.h:609

io.h

mshadow::Tensor< Device, 1, DType >::Tensor
MSHADOW_XINLINE Tensor(DType *dptr, Shape< 1 > shape, index_t stride, Stream< Device > *stream)
Definition: tensor.h:583

mshadow::Tensor< Device, 1, DType >::FlatTo1D
MSHADOW_XINLINE Tensor< Device, 1, DType > FlatTo1D(void) const
Definition: tensor.h:589

mshadow::Tensor::stride_
index_t stride_
storing the stride information in x dimension this is used to deal with pitch allocation in gpu or ss...
Definition: tensor.h:441

mshadow::Tensor< Device, 1, DType >::FlatTo2D
MSHADOW_XINLINE Tensor< Device, 2, DType > FlatTo2D(void) const
Definition: tensor.h:592

mshadow::Tensor< Device, 1, DType >::operator=
Tensor< Device, 1, DType > & operator=(const DType &exp)
Definition: tensor.h:629

mshadow::Tensor::Slice
MSHADOW_XINLINE Tensor< Device, dimension, DType > Slice(index_t begin, index_t end) const
slice the tensor in highest dimension [begin,end)
Definition: tensor.h:538

mshadow::Tensor
general tensor
Definition: tensor.h:420

tensor_cpu-inl.h
implementation of CPU host code

base.h

MSHADOW_DEFAULT_DTYPE
#define MSHADOW_DEFAULT_DTYPE
default data type for tensor string in code release, change it to default_real_t during development...
Definition: base.h:249

mshadow::Tensor< Device, 1, DType >::Tensor
MSHADOW_XINLINE Tensor(void)
Definition: tensor.h:576

mshadow::Tensor::stream_
Stream< Device > * stream_
stream where the computation lies stream is a device dependency concept where each computation ...
Definition: tensor.h:446

mshadow::Tensor< Device, 1, DType >::shape_
Shape< 1 > shape_
Definition: tensor.h:572

mshadow::Shape::Size
MSHADOW_XINLINE index_t Size(void) const
Definition: tensor.h:144

mshadow::Tensor::operator[]
MSHADOW_XINLINE Tensor< Device, kSubdim, DType > operator[](index_t idx) const
get a element of dimension - 1
Definition: tensor.h:527

mshadow::Tensor::operator=
Tensor< Device, dimension, DType > & operator=(const DType &exp)
functions to fit expression template
Definition: tensor.h:560

mshadow::Stream
computaion stream structure, used for asynchronous computations
Definition: tensor.h:383