25 #ifndef MXNET_COMMON_UTILS_H_    26 #define MXNET_COMMON_UTILS_H_    28 #include <dmlc/logging.h>    30 #include <nnvm/graph.h>    35 #include <nnvm/graph_attr_types.h>    39 #include <type_traits>    48 #include "../operator/mxnet_op.h"    49 #if MXNET_USE_MKLDNN == 1    50 #include "../operator/nn/mkldnn/mkldnn_base-inl.h"    61   template<
typename DType, 
typename IType>
    62   MSHADOW_XINLINE 
static void Map(
int i, DType* out, 
const IType* indptr,
    64     if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
    65         (i == 0 && indptr[i] != 0) ||
    66         (i == end - 1 && indptr[end] != idx_size))
    76   template<
typename DType, 
typename IType, 
typename RType>
    77   MSHADOW_XINLINE 
static void Map(
int i, DType* out, 
const IType* idx,
    79     for (RType j = indptr[i]; j < indptr[i+1]; j++) {
    80       if (idx[j] >= ncols || idx[j] < 0 ||
    81           (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
    94   template<
typename DType, 
typename IType>
    95   MSHADOW_XINLINE 
static void Map(
int i, DType* out, 
const IType* idx,
    97     if ((i < end && idx[i+1] <= idx[i])
    98         || idx[i] < 0 || idx[i] >= nrows)
   103 template<
typename xpu>
   105                         const TBlob &err_cpu, 
const bool full_check);
   115 template<
typename xpu>
   117                         const TBlob &err_cpu, 
const bool full_check) {
   118   using namespace op::mxnet_op;
   120           << 
"CheckFormatCSRImpl is for CSRNDArray";
   125   if ((shape.ndim() != 2) ||
   126       (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
   127       (indptr_shape[0] != shape[0] + 1) ||
   128       (idx_shape[0] != storage_shape[0])) {
   129      MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   130        DType* err = err_cpu.dptr<DType>();
   136     MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   137       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
   138         MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
   139           mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
   140           NDArray ret_xpu = NDArray(mshadow::Shape1(1),
   141                                     rctx.get_ctx(), false, err_cpu.type_flag_);
   142           TBlob val_xpu = ret_xpu.data();
   143           Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
   144           Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
   145             input.aux_data(csr::kIndPtr).dptr<RType>(),
   146             indptr_shape[0] - 1, idx_shape[0]);
   148           if (idx_shape[0] != 0) {
   149             Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
   150               input.aux_data(csr::kIdx).dptr<IType>(),
   151               input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
   153           mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
   154                         val_xpu.get<xpu, 1, DType>(s), s);
   169 template<
typename xpu>
   171                         const TBlob &err_cpu, 
const bool full_check) {
   172   using namespace op::mxnet_op;
   174           << 
"CheckFormatRSPImpl is for RSPNDArray";
   177     MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   178       DType* err = err_cpu.dptr<DType>();
   183   if (idx_shape[0] == 0) {
   187     MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
   188       MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
   189         mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
   190         NDArray ret_xpu = NDArray(mshadow::Shape1(1),
   191                                   rctx.get_ctx(), false, err_cpu.type_flag_);
   192         TBlob val_xpu = ret_xpu.data();
   193         Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
   195         Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
   196           val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
   197           idx_shape[0] - 1, input.shape()[0]);
   198         mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
   199                       val_xpu.get<xpu, 1, DType>(s), s);
   205 template<
typename xpu>
   207                      const TBlob &err_cpu, 
const bool full_check) {
   210     CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
   212     CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
   216     LOG(FATAL) << 
"Unknown storage type " << stype;
   223 template<
typename xpu>
   226                                      const TBlob& idx_data,
   232 template<
typename xpu>
   240   if (!vstorage.empty()) {
   241     for (
const auto& i : vstorage) {
   242       if (i != stype) 
return false;
   260   if (!vstorage.empty()) {
   262     for (
const auto i : vstorage) {
   265       } 
else if (i == stype2) {
   272       *has_both = has == 3;
   284   if (!ndarrays.empty()) {
   285     for (
const auto& nd : ndarrays) {
   286       if (nd.storage_type() != stype) {
   305   if (!ndarrays.empty()) {
   307     for (
const auto& nd : ndarrays) {
   309       if (stype == stype1) {
   311       } 
else if (stype == stype2) {
   318       *has_both = has == 3;
   330   if (!ndarrays.empty()) {
   331     for (
const auto& nd : ndarrays) {
   332       if (nd.storage_type() == stype) {
   345   if (!ndstypes.empty()) {
   346     for (
const auto& ndstype : ndstypes) {
   347       if (ndstype == stype) {
   361       return "fcompute_ex";
   363       return "fcompute_fallback";
   404                                          const std::vector<int>& in_attrs,
   405                                          const std::vector<int>& out_attrs) {
   406   std::ostringstream os;
   407   os << 
"operator = " << attrs.op->name
   408      << 
"\ninput storage types = [";
   409   for (
const int attr : in_attrs) {
   413      << 
"output storage types = [";
   414   for (
const int attr : out_attrs) {
   419   for (
auto kv : attrs.dict) {
   420     os << 
"\"" << kv.first << 
"\" : " << kv.second << 
", ";
   430                                   const std::vector<NDArray>& inputs,
   431                                   const std::vector<OpReqType>& req,
   432                                   const std::vector<NDArray>& outputs) {
   433   std::string result = 
"";
   434   std::vector<int> in_stypes;
   435   std::vector<int> out_stypes;
   436   in_stypes.reserve(inputs.size());
   437   out_stypes.reserve(outputs.size());
   438   auto xform = [](
const NDArray arr) -> 
int { 
return arr.storage_type(); };
   439   std::transform(inputs.begin(), inputs.end(), std::back_inserter(in_stypes), xform);
   440   std::transform(outputs.begin(), outputs.end(), std::back_inserter(out_stypes), xform);
   446 inline void LogOnce(
const std::string& message) {
   447   typedef dmlc::ThreadLocalStore<std::unordered_set<std::string>> LogStore;
   448   auto log_store = LogStore::Get();
   449   if (log_store->find(message) == log_store->end()) {
   450     LOG(INFO) << message;
   451     log_store->insert(message);
   459                                const std::vector<int>* in_attrs,
   460                                const std::vector<int>* out_attrs) {
   461   static bool log = dmlc::GetEnv(
"MXNET_STORAGE_FALLBACK_LOG_VERBOSE", 
true);
   464   std::ostringstream os;
   465   const char* warning = 
"\nThe operator with default storage type will be dispatched "   466     "for execution. You're seeing this warning message because the operator above is unable "   467     "to process the given ndarrays with specified storage types, context and parameter. "   468     "Temporary dense ndarrays are generated in order to execute the operator. "   469     "This does not affect the correctness of the programme. "   470     "You can set environment variable MXNET_STORAGE_FALLBACK_LOG_VERBOSE to "   471     "0 to suppress this warning.";
   472   os << 
"\nStorage type fallback detected:\n" << op_str << warning;
   474 #if MXNET_USE_MKLDNN == 1   475   if (!MKLDNNEnvSet()) 
common::LogOnce(
"MXNET_MKLDNN_ENABLED flag is off. "   476                                        "You can re-enable by setting MXNET_MKLDNN_ENABLED=1");
   477   if (GetMKLDNNCacheSize() != -1) 
common::LogOnce(
"MXNET_MKLDNN_CACHE_NUM is set."   478                                        "Should only be set if "   479                                        "your model has variable input shapes, "   480                                        "as cache size may grow unbounded");
   487   return dmlc::GetEnv(
"MXNET_GPU_WORKER_NTHREADS", 2);
   494   int num_match_color = dmlc::GetEnv(
"MXNET_EXEC_NUM_TEMP", 1);
   498 template<
typename T, 
typename V>
   501 #pragma omp parallel for reduction(+:sum)   502   for (
int i = 0; i < n; ++i) {
   515 template<
typename RandomIt, 
typename Compare>
   517                         size_t grainsize, 
const Compare& comp) {
   518   if (len < grainsize) {
   521     std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
   524     std::inplace_merge(first, first+len/2, first+len, comp);
   537 template<
typename RandomIt, 
typename Compare>
   538 void ParallelSort(RandomIt first, RandomIt last, 
size_t num_threads, Compare comp) {
   539   const auto num = std::distance(first, last);
   540   size_t grainsize = 
std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
   553 template<
typename RandomIt>
   556                std::less<
typename std::iterator_traits<RandomIt>::value_type>());
   594 template <
class T, 
size_t kSize>
   615 template <
class T, 
class... Args>
   617   return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
   631   using U = 
typename std::remove_extent<T>::type;
   632   return std::unique_ptr<T>(
new U[n]{});
   643 template <
class T, 
class... Args>
   646 template<
typename FCompType>
   649   static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + 
"<cpu>");
   650   static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + 
"<gpu>");
   652   if (ctx.
dev_mask() == cpu::kDevMask) {
   653     return fcompute_cpu.get(op, 
nullptr);
   654   } 
else if (ctx.
dev_mask() == gpu::kDevMask) {
   655     return fcompute_gpu.get(op, 
nullptr);
   657     LOG(FATAL) << 
"Unknown device mask";
   665 template <
typename T>
   667   return std::is_integral<T>::value ?
   669     size_t(2) << (std::numeric_limits<T>::digits - 1);
   673 constexpr 
size_t MaxIntegerValue<mshadow::half::half_t>() {
   674   return size_t(2) << 10;
   693                          const Context &ctx, 
const int dtype) {
   696     NDArray ret(shape, ctx, 
false, dtype);
   701   return NDArray(stype, shape, ctx, 
true, dtype);
   708                              const Context &ctx, 
const int dtype,
   709                              std::vector<NDArray> *vec) {
   712     vec->emplace_back(shape, ctx, 
false, dtype);
   716     vec->emplace_back(stype, shape, ctx, 
true, dtype);
   724 template<
typename DType>
   726   static index_t copy_block_size = dmlc::GetEnv(
"MXNET_CPU_PARALLEL_COPY_SIZE", 200000);
   727   if (size >= copy_block_size) {
   728     #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())   729     for (
index_t i = 0; i < size; ++i) {
   733     std::memcpy(dst, src, 
sizeof(DType) * size);
   739 #endif  // MXNET_COMMON_UTILS_H_ 
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:77
 
NDArrayStorageType
Definition: ndarray.h:61
 
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray. 
Definition: utils.h:116
 
DeviceType dev_mask() const 
Get corresponding device mask. 
Definition: base.h:151
 
NDArrayStorageType storage_type() const 
Definition: ndarray.h:313
 
Engine that schedules all the operations according to dependency. 
 
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:206
 
int GetNumThreadsPerGPU()
Definition: utils.h:485
 
void SparseRetainOpForwardRspWrapper(mshadow::Stream< xpu > *s, const NDArray &input_nd, const TBlob &idx_data, const OpReqType req, NDArray *output_nd)
Pick rows specified by user input index array from a row sparse ndarray and save them in the output s...
 
const TShape & storage_shape() const 
Definition: ndarray.h:221
 
std::string operator_stype_string(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > &in_attrs, const std::vector< int > &out_attrs)
get string representation of the operator stypes 
Definition: utils.h:402
 
namespace of mxnet 
Definition: base.h:118
 
Additional operator attributes beside the ones provided by NNVM. 
 
void KnownBound
Type of T. 
Definition: utils.h:599
 
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:516
 
int type_flag_
type flag of the tensor blob 
Definition: tensor_blob.h:74
 
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:647
 
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:499
 
void LogOnce(const std::string &message)
log message once. Intended for storage fallback warning messages. 
Definition: utils.h:446
 
nnvm::TShape TShape
Shape data structure used to record shape information. 
Definition: base.h:128
 
Context ctx
base Context 
Definition: base.h:259
 
execution time context. The information needed in runtime for actual execution. 
Definition: base.h:257
 
DispatchMode
the dispatch mode of the operator 
Definition: op_attr_types.h:113
 
NDArray InitZeros(const NDArrayStorageType stype, const TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros. 
Definition: utils.h:692
 
std::string stype_string(const int x)
get string representation of storage_type 
Definition: utils.h:374
 
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
 
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
 
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:538
 
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
 
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:238
 
std::string operator_string(const nnvm::NodeAttrs &attrs, const OpContext &ctx, const std::vector< NDArray > &inputs, const std::vector< OpReqType > &req, const std::vector< NDArray > &outputs)
get string representation of the operator 
Definition: utils.h:428
 
Symbol max(const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
Definition: op.h:2756
 
std::mt19937 RANDOM_ENGINE
Random Engine. 
Definition: utils.h:562
 
void EmplaceBackZeros(const NDArrayStorageType stype, const TShape &shape, const Context &ctx, const int dtype, std::vector< NDArray > *vec)
Helper to add a NDArray of zeros to a std::vector. 
Definition: utils.h:707
 
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:93
 
const TShape & shape() const 
Definition: ndarray.h:213
 
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode 
Definition: utils.h:356
 
std::string dev_type_string(const int dev_type)
get string representation of device type 
Definition: utils.h:387
 
Symbol log(const std::string &symbol_name, Symbol data)
Definition: op.h:2355
 
Helper for non-array type T. 
Definition: utils.h:573
 
Data structures that can appear in graph attributes. 
 
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:60
 
std::unique_ptr< T[]> UnknownBound
Type of T. 
Definition: utils.h:588
 
OpReqType
operation request type to Forward and Backward 
Definition: op_attr_types.h:45
 
nnvm::Op Op
operator structure from NNVM 
Definition: base.h:130
 
bool ContainsStorageType(const std::vector< NDArray > &ndarrays, const NDArrayStorageType stype)
returns true if storage type of any array in ndarrays is the same as the target stype. false is returned for empty inputs. 
Definition: utils.h:328
 
constexpr size_t MaxIntegerValue()
Return the max integer value representable in the type T without loss of precision. 
Definition: utils.h:666
 
RunContext run_ctx
RunContext related resources. 
Definition: op_attr_types.h:72
 
int64_t dim_t
data type to store dim size 
Definition: c_api.h:62
 
Symbol sort(const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)
Definition: op.h:3107
 
std::unique_ptr< T > SingleObject
Type of T. 
Definition: utils.h:577
 
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray. 
Definition: utils.h:170
 
int GetExecNumMatchColor()
Definition: utils.h:492
 
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:95
 
Symbol min(const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
Definition: op.h:2793
 
mshadow::index_t index_t
index type usually use unsigned 
Definition: base.h:124
 
MSHADOW_XINLINE int ilog2ul(size_t a)
Definition: utils.h:677
 
void LogStorageFallback(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > *in_attrs, const std::vector< int > *out_attrs)
log storage fallback event 
Definition: utils.h:457
 
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&...args)
Constructs an object of type T and wraps it in a std::unique_ptr. 
Definition: utils.h:616
 
Context information about the execution environment. 
Definition: base.h:133
 
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:75
 
const TShape & aux_shape(size_t index) const 
get the shape of aux_data(index) 
Definition: ndarray.h:233
 
ndarray interface 
Definition: ndarray.h:82
 
void ParallelCopy(DType *dst, const DType *src, index_t size)
parallelize copy by OpenMP. 
Definition: utils.h:725
 
MSHADOW_XINLINE int ilog2ui(unsigned int a)
Definition: utils.h:683
 
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:62
 
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph. 
Definition: graph_attr_types.h:45
 
Symbol sum(const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
Definition: op.h:2567
 
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:66