25 #ifndef MXNET_COMMON_UTILS_H_    26 #define MXNET_COMMON_UTILS_H_    28 #include <dmlc/logging.h>    30 #include <nnvm/graph.h>    35 #include <nnvm/graph_attr_types.h>    39 #include <type_traits>    47 #include "../operator/mxnet_op.h"    57   template<
typename DType, 
typename IType>
    58   MSHADOW_XINLINE 
static void Map(
int i, DType* out, 
const IType* indptr,
    59                                   const nnvm::dim_t end, 
const nnvm::dim_t idx_size) {
    60     if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
    61         (i == 0 && indptr[i] != 0) ||
    62         (i == end - 1 && indptr[end] != idx_size))
    72   template<
typename DType, 
typename IType, 
typename RType>
    73   MSHADOW_XINLINE 
static void Map(
int i, DType* out, 
const IType* idx,
    74                                   const RType* indptr, 
const nnvm::dim_t ncols) {
    75     for (RType j = indptr[i]; j < indptr[i+1]; j++) {
    76       if (idx[j] >= ncols || idx[j] < 0 ||
    77           (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
    90   template<
typename DType, 
typename IType>
    91   MSHADOW_XINLINE 
static void Map(
int i, DType* out, 
const IType* idx,
    92                                   const nnvm::dim_t end, 
const nnvm::dim_t nrows) {
    93     if ((i < end && idx[i+1] <= idx[i])
    94         || idx[i] < 0 || idx[i] >= nrows)
    99 template<
typename xpu>
   101                         const TBlob &err_cpu, 
const bool full_check);
   111 template<
typename xpu>
   113                         const TBlob &err_cpu, 
const bool full_check) {
   114   using namespace op::mxnet_op;
   116           << 
"CheckFormatCSRImpl is for CSRNDArray";
   121   if ((shape.ndim() != 2) ||
   122       (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
   123       (indptr_shape[0] != shape[0] + 1) ||
   124       (idx_shape[0] != storage_shape[0])) {
   125      MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   126        DType* err = err_cpu.dptr<DType>();
   132     MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   133       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
   134         MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
   135           mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
   136           NDArray ret_xpu = NDArray(mshadow::Shape1(1),
   137                                     rctx.get_ctx(), false, err_cpu.type_flag_);
   138           TBlob val_xpu = ret_xpu.data();
   139           Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
   140           Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
   141             input.aux_data(csr::kIndPtr).dptr<RType>(),
   142             indptr_shape[0] - 1, idx_shape[0]);
   144           if (idx_shape[0] != 0) {
   145             Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
   146               input.aux_data(csr::kIdx).dptr<IType>(),
   147               input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
   149           mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
   150                         val_xpu.get<xpu, 1, DType>(s), s);
   165 template<
typename xpu>
   167                         const TBlob &err_cpu, 
const bool full_check) {
   168   using namespace op::mxnet_op;
   170           << 
"CheckFormatRSPImpl is for RSPNDArray";
   173     MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   174       DType* err = err_cpu.dptr<DType>();
   179   if (idx_shape[0] == 0) {
   183     MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   184       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
   185         mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
   186         NDArray ret_xpu = NDArray(mshadow::Shape1(1),
   187                                   rctx.get_ctx(), false, err_cpu.type_flag_);
   188         TBlob val_xpu = ret_xpu.data();
   189         Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
   191         Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
   192           val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
   193           idx_shape[0] - 1, input.shape()[0]);
   194         mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
   195                       val_xpu.get<xpu, 1, DType>(s), s);
   201 template<
typename xpu>
   203                      const TBlob &err_cpu, 
const bool full_check) {
   206     CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
   208     CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
   212     LOG(FATAL) << 
"Unknown storage type " << stype;
   217 template<
typename xpu>
   225   if (!vstorage.empty()) {
   226     for (
const auto& i : vstorage) {
   227       if (i != stype) 
return false;
   245   if (!vstorage.empty()) {
   247     for (
const auto i : vstorage) {
   250       } 
else if (i == stype2) {
   257       *has_both = has == 3;
   269   if (!ndarrays.empty()) {
   270     for (
const auto& nd : ndarrays) {
   271       if (nd.storage_type() != stype) {
   290   if (!ndarrays.empty()) {
   292     for (
const auto& nd : ndarrays) {
   294       if (stype == stype1) {
   296       } 
else if (stype == stype2) {
   303       *has_both = has == 3;
   316       return "fcompute_ex";
   318       return "fcompute_fallback";
   344   return dmlc::GetEnv(
"MXNET_GPU_WORKER_NTHREADS", 2);
   351   int num_match_color = dmlc::GetEnv(
"MXNET_EXEC_NUM_TEMP", 1);
   355 template<
typename T, 
typename V>
   358 #pragma omp parallel for reduction(+:sum)   359   for (
int i = 0; i < n; ++i) {
   372 template<
typename RandomIt, 
typename Compare>
   374                         size_t grainsize, 
const Compare& comp) {
   375   if (len < grainsize) {
   378     std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
   381     std::inplace_merge(first, first+len/2, first+len, comp);
   394 template<
typename RandomIt, 
typename Compare>
   395 void ParallelSort(RandomIt first, RandomIt last, 
size_t num_threads, Compare comp) {
   396   const auto num = std::distance(first, last);
   397   size_t grainsize = 
std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
   410 template<
typename RandomIt>
   413                std::less<
typename std::iterator_traits<RandomIt>::value_type>());
   451 template <
class T, 
size_t kSize>
   472 template <
class T, 
class... Args>
   474   return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
   488   using U = 
typename std::remove_extent<T>::type;
   489   return std::unique_ptr<T>(
new U[n]{});
   500 template <
class T, 
class... Args>
   503 template<
typename FCompType>
   506   static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + 
"<cpu>");
   507   static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + 
"<gpu>");
   509   if (ctx.
dev_mask() == cpu::kDevMask) {
   510     return fcompute_cpu.get(op, 
nullptr);
   511   } 
else if (ctx.
dev_mask() == gpu::kDevMask) {
   512     return fcompute_gpu.get(op, 
nullptr);
   514     LOG(FATAL) << 
"Unknown device mask";
   521 #endif  // MXNET_COMMON_UTILS_H_ Symbol min(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:2219
 
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:73
 
NDArrayStorageType
Definition: ndarray.h:59
 
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray. 
Definition: utils.h:112
 
DeviceType dev_mask() const 
Get corresponding device mask. 
Definition: base.h:160
 
NDArrayStorageType storage_type() const 
Definition: ndarray.h:297
 
Engine that schedules all the operations according to dependency. 
 
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:202
 
const TShape & storage_shape() const 
Definition: ndarray.h:204
 
namespace of mxnet 
Definition: base.h:127
 
Additional operator attributes beside the ones provided by NNVM. 
 
void KnownBound
Type of T. 
Definition: utils.h:456
 
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:373
 
int type_flag_
type flag of the tensor blob 
Definition: tensor_blob.h:67
 
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:504
 
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:356
 
nnvm::TShape TShape
Shape data structure used to record shape information. 
Definition: base.h:137
 
int GetNumThreadPerGPU()
Definition: utils.h:342
 
execution time context. The information needed in runtime for actual execution. 
Definition: base.h:253
 
DispatchMode
the dispatch mode of the operator 
Definition: op_attr_types.h:105
 
Symbol max(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:2182
 
std::string stype_string(const int x)
get string representation of storage_type 
Definition: utils.h:329
 
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
 
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
 
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:395
 
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
 
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:223
 
std::mt19937 RANDOM_ENGINE
Random Engine. 
Definition: utils.h:419
 
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:89
 
const TShape & shape() const 
Definition: ndarray.h:196
 
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode 
Definition: utils.h:311
 
Helper for non-array type T. 
Definition: utils.h:430
 
Data structures that can appear in graph attributes. 
 
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:56
 
std::unique_ptr< T[]> UnknownBound
Type of T. 
Definition: utils.h:445
 
nnvm::Op Op
operator structure from NNVM 
Definition: base.h:139
 
Symbol sort(const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)
Definition: op.h:2439
 
std::unique_ptr< T > SingleObject
Type of T. 
Definition: utils.h:434
 
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray. 
Definition: utils.h:166
 
int GetExecNumMatchColor()
Definition: utils.h:349
 
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:91
 
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&...args)
Constructs an object of type T and wraps it in a std::unique_ptr. 
Definition: utils.h:473
 
Context information about the execution environment. 
Definition: base.h:142
 
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:71
 
const TShape & aux_shape(size_t index) const 
get the shape of aux_data(index) 
Definition: ndarray.h:216
 
ndarray interface 
Definition: ndarray.h:79
 
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:58
 
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph. 
Definition: graph_attr_types.h:45
 
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:59
 
Symbol sum(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:1993