25 #ifndef MXNET_COMMON_UTILS_H_    26 #define MXNET_COMMON_UTILS_H_    28 #include <dmlc/logging.h>    30 #include <nnvm/graph.h>    35 #include <nnvm/graph_attr_types.h>    39 #include <type_traits>    47 #include "../operator/mxnet_op.h"    57   template<
typename DType, 
typename IType>
    58   MSHADOW_XINLINE 
static void Map(
int i, DType* out, 
const IType* indptr,
    59                                   const nnvm::dim_t end, 
const nnvm::dim_t idx_size) {
    60     if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
    61         (i == 0 && indptr[i] != 0) ||
    62         (i == end - 1 && indptr[end] != idx_size))
    72   template<
typename DType, 
typename IType, 
typename RType>
    73   MSHADOW_XINLINE 
static void Map(
int i, DType* out, 
const IType* idx,
    74                                   const RType* indptr, 
const nnvm::dim_t ncols) {
    75     for (RType j = indptr[i]; j < indptr[i+1]; j++) {
    76       if (idx[j] >= ncols || idx[j] < 0 ||
    77           (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
    90   template<
typename DType, 
typename IType>
    91   MSHADOW_XINLINE 
static void Map(
int i, DType* out, 
const IType* idx,
    92                                   const nnvm::dim_t end, 
const nnvm::dim_t nrows) {
    93     if ((i < end && idx[i+1] <= idx[i])
    94         || idx[i] < 0 || idx[i] >= nrows)
    99 template<
typename xpu>
   101                         const TBlob &err_cpu, 
const bool full_check);
   111 template<
typename xpu>
   113                         const TBlob &err_cpu, 
const bool full_check) {
   114   using namespace op::mxnet_op;
   116           << 
"CheckFormatCSRImpl is for CSRNDArray";
   121   if ((shape.ndim() != 2) ||
   122       (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
   123       (indptr_shape[0] != shape[0] + 1) ||
   124       (idx_shape[0] != storage_shape[0])) {
   125      MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   126        DType* err = err_cpu.dptr<DType>();
   132     MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   133       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
   134         MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
   135           mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
   136           NDArray ret_xpu = NDArray(mshadow::Shape1(1),
   137                                     rctx.get_ctx(), false, err_cpu.type_flag_);
   138           TBlob val_xpu = ret_xpu.data();
   139           Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
   140           Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
   141             input.aux_data(csr::kIndPtr).dptr<RType>(),
   142             indptr_shape[0] - 1, idx_shape[0]);
   144           if (idx_shape[0] != 0) {
   145             Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
   146               input.aux_data(csr::kIdx).dptr<IType>(),
   147               input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
   149           mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
   150                         val_xpu.get<xpu, 1, DType>(s), s);
   165 template<
typename xpu>
   167                         const TBlob &err_cpu, 
const bool full_check) {
   168   using namespace op::mxnet_op;
   170           << 
"CheckFormatRSPImpl is for RSPNDArray";
   173     MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   174       DType* err = err_cpu.dptr<DType>();
   179   if (idx_shape[0] == 0) {
   183     MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   184       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
   185         mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
   186         NDArray ret_xpu = NDArray(mshadow::Shape1(1),
   187                                   rctx.get_ctx(), false, err_cpu.type_flag_);
   188         TBlob val_xpu = ret_xpu.data();
   189         Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
   191         Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
   192           val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
   193           idx_shape[0] - 1, input.shape()[0]);
   194         mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
   195                       val_xpu.get<xpu, 1, DType>(s), s);
   201 template<
typename xpu>
   203                      const TBlob &err_cpu, 
const bool full_check) {
   206     CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
   208     CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
   212     LOG(FATAL) << 
"Unknown storage type " << stype;
   219 template<
typename xpu>
   222                                      const TBlob& idx_data,
   228 template<
typename xpu>
   236   if (!vstorage.empty()) {
   237     for (
const auto& i : vstorage) {
   238       if (i != stype) 
return false;
   256   if (!vstorage.empty()) {
   258     for (
const auto i : vstorage) {
   261       } 
else if (i == stype2) {
   268       *has_both = has == 3;
   280   if (!ndarrays.empty()) {
   281     for (
const auto& nd : ndarrays) {
   282       if (nd.storage_type() != stype) {
   301   if (!ndarrays.empty()) {
   303     for (
const auto& nd : ndarrays) {
   305       if (stype == stype1) {
   307       } 
else if (stype == stype2) {
   314       *has_both = has == 3;
   327       return "fcompute_ex";
   329       return "fcompute_fallback";
   370                                          const std::vector<int>& in_attrs,
   371                                          const std::vector<int>& out_attrs) {
   372   std::ostringstream os;
   373   os << 
"operator = " << attrs.op->name
   374      << 
"\ninput storage types = [";
   375   for (
const int attr : in_attrs) {
   379      << 
"output storage types = [";
   380   for (
const int attr : out_attrs) {
   385   for (
auto kv : attrs.dict) {
   386     os << 
"\"" << kv.first << 
"\" : " << kv.second << 
", ";
   396                                   const std::vector<NDArray>& inputs,
   397                                   const std::vector<OpReqType>& req,
   398                                   const std::vector<NDArray>& outputs) {
   399   std::string result = 
"";
   400   std::vector<int> in_stypes;
   401   std::vector<int> out_stypes;
   402   in_stypes.reserve(inputs.size());
   403   out_stypes.reserve(outputs.size());
   404   auto xform = [](
const NDArray arr) -> 
int { 
return arr.storage_type(); };
   405   std::transform(inputs.begin(), inputs.end(), std::back_inserter(in_stypes), xform);
   406   std::transform(outputs.begin(), outputs.end(), std::back_inserter(out_stypes), xform);
   412 inline void LogOnce(
const std::string& message) {
   413   typedef dmlc::ThreadLocalStore<std::unordered_set<std::string>> LogStore;
   414   auto log_store = LogStore::Get();
   415   if (log_store->find(message) == log_store->end()) {
   416     LOG(INFO) << message;
   417     log_store->insert(message);
   425                                const std::vector<int>* in_attrs,
   426                                const std::vector<int>* out_attrs) {
   427   static bool log = dmlc::GetEnv(
"MXNET_STORAGE_FALLBACK_LOG_VERBOSE", 
true);
   430   std::ostringstream os;
   431   const char* warning = 
"\nThe operator with default storage type will be dispatched "   432     "for execution. You're seeing this warning message because the operator above is unable "   433     "to process the given ndarrays with specified storage types, context and parameter. "   434     "Temporary dense ndarrays are generated in order to execute the operator. "   435     "You can set environment variable MXNET_STORAGE_FALLBACK_LOG_VERBOSE to "   436     "0 to suppress this warning.";
   437   os << 
"\nStorage type fallback detected:\n" << op_str << warning;
   444   return dmlc::GetEnv(
"MXNET_GPU_WORKER_NTHREADS", 2);
   451   int num_match_color = dmlc::GetEnv(
"MXNET_EXEC_NUM_TEMP", 1);
   455 template<
typename T, 
typename V>
   458 #pragma omp parallel for reduction(+:sum)   459   for (
int i = 0; i < n; ++i) {
   472 template<
typename RandomIt, 
typename Compare>
   474                         size_t grainsize, 
const Compare& comp) {
   475   if (len < grainsize) {
   478     std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
   481     std::inplace_merge(first, first+len/2, first+len, comp);
   494 template<
typename RandomIt, 
typename Compare>
   495 void ParallelSort(RandomIt first, RandomIt last, 
size_t num_threads, Compare comp) {
   496   const auto num = std::distance(first, last);
   497   size_t grainsize = 
std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
   510 template<
typename RandomIt>
   513                std::less<
typename std::iterator_traits<RandomIt>::value_type>());
   551 template <
class T, 
size_t kSize>
   572 template <
class T, 
class... Args>
   574   return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
   588   using U = 
typename std::remove_extent<T>::type;
   589   return std::unique_ptr<T>(
new U[n]{});
   600 template <
class T, 
class... Args>
   603 template<
typename FCompType>
   606   static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + 
"<cpu>");
   607   static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + 
"<gpu>");
   609   if (ctx.
dev_mask() == cpu::kDevMask) {
   610     return fcompute_cpu.get(op, 
nullptr);
   611   } 
else if (ctx.
dev_mask() == gpu::kDevMask) {
   612     return fcompute_gpu.get(op, 
nullptr);
   614     LOG(FATAL) << 
"Unknown device mask";
   621 #endif  // MXNET_COMMON_UTILS_H_ Symbol min(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:2290
 
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:73
 
NDArrayStorageType
Definition: ndarray.h:59
 
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray. 
Definition: utils.h:112
 
DeviceType dev_mask() const 
Get corresponding device mask. 
Definition: base.h:160
 
NDArrayStorageType storage_type() const 
Definition: ndarray.h:297
 
Engine that schedules all the operations according to dependency. 
 
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:202
 
void SparseRetainOpForwardRspWrapper(mshadow::Stream< xpu > *s, const NDArray &input_nd, const TBlob &idx_data, const OpReqType req, NDArray *output_nd)
Pick rows specified by user input index array from a row sparse ndarray and save them in the output s...
 
const TShape & storage_shape() const 
Definition: ndarray.h:204
 
std::string operator_stype_string(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > &in_attrs, const std::vector< int > &out_attrs)
get string representation of the operator stypes 
Definition: utils.h:368
 
namespace of mxnet 
Definition: base.h:127
 
Additional operator attributes beside the ones provided by NNVM. 
 
void KnownBound
Type of T. 
Definition: utils.h:556
 
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:473
 
int type_flag_
type flag of the tensor blob 
Definition: tensor_blob.h:67
 
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:604
 
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:456
 
void LogOnce(const std::string &message)
log message once. Intended for storage fallback warning messages. 
Definition: utils.h:412
 
nnvm::TShape TShape
Shape data structure used to record shape information. 
Definition: base.h:137
 
int GetNumThreadPerGPU()
Definition: utils.h:442
 
Context ctx
base Context 
Definition: base.h:255
 
execution time context. The information needed in runtime for actual execution. 
Definition: base.h:253
 
DispatchMode
the dispatch mode of the operator 
Definition: op_attr_types.h:105
 
Symbol max(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:2253
 
std::string stype_string(const int x)
get string representation of storage_type 
Definition: utils.h:340
 
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
 
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
 
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:495
 
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
 
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:234
 
std::string operator_string(const nnvm::NodeAttrs &attrs, const OpContext &ctx, const std::vector< NDArray > &inputs, const std::vector< OpReqType > &req, const std::vector< NDArray > &outputs)
get string representation of the operator 
Definition: utils.h:394
 
std::mt19937 RANDOM_ENGINE
Random Engine. 
Definition: utils.h:519
 
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:89
 
const TShape & shape() const 
Definition: ndarray.h:196
 
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode 
Definition: utils.h:322
 
std::string dev_type_string(const int dev_type)
get string representation of device type 
Definition: utils.h:353
 
Symbol log(const std::string &symbol_name, Symbol data)
Definition: op.h:1872
 
Helper for non-array type T. 
Definition: utils.h:530
 
Data structures that can appear in graph attributes. 
 
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:56
 
std::unique_ptr< T[]> UnknownBound
Type of T. 
Definition: utils.h:545
 
OpReqType
operation request type to Forward and Backward 
Definition: op_attr_types.h:45
 
nnvm::Op Op
operator structure from NNVM 
Definition: base.h:139
 
RunContext run_ctx
RunContext related resources. 
Definition: op_attr_types.h:70
 
Symbol sort(const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)
Definition: op.h:2518
 
std::unique_ptr< T > SingleObject
Type of T. 
Definition: utils.h:534
 
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray. 
Definition: utils.h:166
 
int GetExecNumMatchColor()
Definition: utils.h:449
 
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:91
 
void LogStorageFallback(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > *in_attrs, const std::vector< int > *out_attrs)
log storage fallback event 
Definition: utils.h:423
 
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&...args)
Constructs an object of type T and wraps it in a std::unique_ptr. 
Definition: utils.h:573
 
Context information about the execution environment. 
Definition: base.h:142
 
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:71
 
const TShape & aux_shape(size_t index) const 
get the shape of aux_data(index) 
Definition: ndarray.h:216
 
ndarray interface 
Definition: ndarray.h:79
 
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:58
 
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph. 
Definition: graph_attr_types.h:45
 
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:59
 
Symbol sum(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:2064