24 #ifndef MXNET_COMMON_UTILS_H_ 25 #define MXNET_COMMON_UTILS_H_ 27 #include <dmlc/logging.h> 40 #include <type_traits> 49 #include "../operator/mxnet_op.h" 50 #if MXNET_USE_MKLDNN == 1 51 #include "../operator/nn/mkldnn/mkldnn_base-inl.h" 54 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__) 64 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__) 74 template<
typename DType,
typename IType>
77 if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
78 (i == 0 && indptr[i] != 0) ||
79 (i == end - 1 && indptr[end] != idx_size))
89 template<
typename DType,
typename IType,
typename RType>
92 for (RType j = indptr[i]; j < indptr[i+1]; j++) {
93 if (idx[j] >= ncols || idx[j] < 0 ||
94 (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
107 template<
typename DType,
typename IType>
110 if ((i < end && idx[i+1] <= idx[i])
111 || idx[i] < 0 || idx[i] >= nrows)
116 template<
typename xpu>
118 const TBlob &err_cpu,
const bool full_check);
128 template<
typename xpu>
130 const TBlob &err_cpu,
const bool full_check) {
131 using namespace op::mxnet_op;
133 <<
"CheckFormatCSRImpl is for CSRNDArray";
138 if ((shape.ndim() != 2) ||
139 (idx_shape.
ndim() != 1 || indptr_shape.
ndim() != 1 || storage_shape.
ndim() != 1) ||
140 (indptr_shape[0] != shape[0] + 1) ||
141 (idx_shape[0] != storage_shape[0])) {
143 DType* err = err_cpu.dptr<DType>();
150 MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
151 MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
152 mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
153 NDArray ret_xpu = NDArray(mshadow::Shape1(1),
154 rctx.get_ctx(), false, err_cpu.type_flag_);
155 TBlob val_xpu = ret_xpu.data();
156 Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
157 Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
158 input.aux_data(csr::kIndPtr).dptr<RType>(),
159 indptr_shape[0] - 1, idx_shape[0]);
161 if (idx_shape[0] != 0) {
162 Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
163 input.aux_data(csr::kIdx).dptr<IType>(),
164 input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
166 mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
167 val_xpu.get<xpu, 1, DType>(s), s);
182 template<
typename xpu>
184 const TBlob &err_cpu,
const bool full_check) {
185 using namespace op::mxnet_op;
187 <<
"CheckFormatRSPImpl is for RSPNDArray";
191 DType* err = err_cpu.dptr<DType>();
196 if (idx_shape[0] == 0) {
201 MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
202 mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
203 NDArray ret_xpu = NDArray(mshadow::Shape1(1),
204 rctx.get_ctx(), false, err_cpu.type_flag_);
205 TBlob val_xpu = ret_xpu.data();
206 Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
208 Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
209 val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
210 idx_shape[0] - 1, input.shape()[0]);
211 mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
212 val_xpu.get<xpu, 1, DType>(s), s);
218 template<
typename xpu>
220 const TBlob &err_cpu,
const bool full_check) {
223 CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
225 CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
229 LOG(FATAL) <<
"Unknown storage type " << stype;
236 template<
typename xpu>
239 const TBlob& idx_data,
245 template<
typename xpu>
253 if (!vstorage.empty()) {
254 for (
const auto& i : vstorage) {
255 if (i != stype)
return false;
273 if (!vstorage.empty()) {
275 for (
const auto i : vstorage) {
278 }
else if (i == stype2) {
285 *has_both = has == 3;
297 if (!ndarrays.empty()) {
298 for (
const auto& nd : ndarrays) {
299 if (nd.storage_type() != stype) {
318 if (!ndarrays.empty()) {
320 for (
const auto& nd : ndarrays) {
322 if (stype == stype1) {
324 }
else if (stype == stype2) {
331 *has_both = has == 3;
343 if (!ndarrays.empty()) {
344 for (
const auto& nd : ndarrays) {
345 if (nd.storage_type() == stype) {
358 if (!ndstypes.empty()) {
359 for (
const auto& ndstype : ndstypes) {
360 if (ndstype == stype) {
374 return "fcompute_ex";
376 return "fcompute_fallback";
415 const std::string& attr_name,
416 std::string default_val =
"") {
417 if (attrs.
dict.find(attr_name) == attrs.
dict.end()) {
420 return attrs.
dict.at(attr_name);
426 const std::vector<int>& in_attrs,
427 const std::vector<int>& out_attrs) {
428 std::ostringstream os;
429 os <<
"operator = " << attrs.
op->
name 430 <<
"\ninput storage types = [";
431 for (
const int attr : in_attrs) {
435 <<
"output storage types = [";
436 for (
const int attr : out_attrs) {
441 for (
auto kv : attrs.
dict) {
442 os <<
"\"" << kv.first <<
"\" : " << kv.second <<
", ";
452 const std::vector<NDArray>& inputs,
453 const std::vector<OpReqType>& req,
454 const std::vector<NDArray>& outputs) {
455 std::string result =
"";
456 std::vector<int> in_stypes;
457 std::vector<int> out_stypes;
458 in_stypes.reserve(inputs.size());
459 out_stypes.reserve(outputs.size());
460 auto xform = [](
const NDArray arr) ->
int {
return arr.storage_type(); };
461 std::transform(inputs.begin(), inputs.end(), std::back_inserter(in_stypes), xform);
462 std::transform(outputs.begin(), outputs.end(), std::back_inserter(out_stypes), xform);
468 inline void LogOnce(
const std::string& message) {
470 auto log_store = LogStore::Get();
471 if (log_store->find(message) == log_store->end()) {
472 LOG(INFO) << message;
473 log_store->insert(message);
481 const std::vector<int>* in_attrs,
482 const std::vector<int>* out_attrs) {
483 static bool log = dmlc::GetEnv(
"MXNET_STORAGE_FALLBACK_LOG_VERBOSE",
true);
486 std::ostringstream os;
487 const char* warning =
"\nThe operator with default storage type will be dispatched " 488 "for execution. You're seeing this warning message because the operator above is unable " 489 "to process the given ndarrays with specified storage types, context and parameter. " 490 "Temporary dense ndarrays are generated in order to execute the operator. " 491 "This does not affect the correctness of the programme. " 492 "You can set environment variable MXNET_STORAGE_FALLBACK_LOG_VERBOSE to " 493 "0 to suppress this warning.";
494 os <<
"\nStorage type fallback detected:\n" << op_str << warning;
496 #if MXNET_USE_MKLDNN == 1 497 if (!MKLDNNEnvSet())
common::LogOnce(
"MXNET_MKLDNN_ENABLED flag is off. " 498 "You can re-enable by setting MXNET_MKLDNN_ENABLED=1");
499 if (GetMKLDNNCacheSize() != -1)
common::LogOnce(
"MXNET_MKLDNN_CACHE_NUM is set." 500 "Should only be set if " 501 "your model has variable input shapes, " 502 "as cache size may grow unbounded");
509 return dmlc::GetEnv(
"MXNET_GPU_WORKER_NTHREADS", 2);
516 int num_match_color = dmlc::GetEnv(
"MXNET_EXEC_NUM_TEMP", 1);
520 template<
typename T,
typename V>
523 #pragma omp parallel for reduction(+:sum) 524 for (
int i = 0; i < n; ++i) {
537 template<
typename RandomIt,
typename Compare>
539 size_t grainsize,
const Compare& comp) {
540 if (len < grainsize) {
541 std::sort(first, first+len, comp);
543 std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
546 std::inplace_merge(first, first+len/2, first+len, comp);
559 template<
typename RandomIt,
typename Compare>
560 void ParallelSort(RandomIt first, RandomIt last,
size_t num_threads, Compare comp) {
561 const auto num = std::distance(first, last);
562 size_t grainsize = std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
575 template<
typename RandomIt>
578 std::less<
typename std::iterator_traits<RandomIt>::value_type>());
616 template <
class T,
size_t kSize>
637 template <
class T,
class... Args>
639 return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
653 using U =
typename std::remove_extent<T>::type;
654 return std::unique_ptr<T>(
new U[n]{});
665 template <
class T,
class... Args>
668 template<
typename FCompType>
671 static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name +
"<cpu>");
672 static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name +
"<gpu>");
675 return fcompute_cpu.get(op,
nullptr);
677 return fcompute_gpu.get(op,
nullptr);
679 LOG(FATAL) <<
"Unknown device mask " << ctx.
dev_mask();
687 template <
typename T>
689 return std::is_integral<T>::value ?
690 std::numeric_limits<T>::max():
691 size_t(2) << (std::numeric_limits<T>::digits - 1);
695 constexpr
size_t MaxIntegerValue<mshadow::half::half_t>() {
696 return size_t(2) << 10;
700 constexpr
size_t MaxIntegerValue<mshadow::bfloat::bf16_t>() {
701 return size_t(2) << 14;
720 const Context &ctx,
const int dtype) {
723 NDArray ret(shape, ctx,
false, dtype);
728 return NDArray(stype, shape, ctx,
true, dtype);
735 const Context &ctx,
const int dtype,
736 std::vector<NDArray> *vec) {
739 vec->emplace_back(shape, ctx,
false, dtype);
743 vec->emplace_back(stype, shape, ctx,
true, dtype);
751 template<
typename DType>
753 static index_t copy_block_size = dmlc::GetEnv(
"MXNET_CPU_PARALLEL_SIZE", 200000);
754 if (size >= copy_block_size) {
755 #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) 756 for (
index_t i = 0; i < size; ++i) {
760 std::memcpy(dst, src,
sizeof(DType) * size);
767 template<
typename DType>
769 static index_t add_block_size = dmlc::GetEnv(
"MXNET_CPU_PARALLEL_SIZE", 200000);
770 if (size >= add_block_size) {
771 #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) 772 for (
index_t i = 0; i < size; ++i) {
776 for (
index_t i = 0; i < size; ++i) {
801 if (shape->
ndim() == 0) {
804 for (
int j = 0; j < shape->
ndim(); ++j) {
805 if ((*shape)[j] == 0) {
813 for (
size_t i = 0; i < shapes->size(); ++i) {
826 for (
int j = 0; j < shape->
ndim(); ++j) {
835 for (
size_t i = 0; i < shapes->size(); ++i) {
841 size_t nid,
const std::function<
void(
const char *,
const char *,
void *)>
846 size_t nid,
const std::function<
void(
const char *,
const char *,
void *)>
860 const int ndim = src.
ndim();
862 for (
int i = 0; i < ndim; ++i) {
866 CHECK(axes[i] >= 0 && axes[i] < ndim) <<
"axes[" << i <<
"]=" 867 << axes[i] <<
" exceeds the range [" 868 << 0 <<
", " << ndim <<
")";
883 if (type1 == type2)
return type1;
893 return is_float(type1) ? type1 : type2;
903 <<
"1 is UInt8 and 1 is Int8 should not get here";
925 if (dtype != -1)
return dtype;
933 #endif // MXNET_COMMON_UTILS_H_
std::vector< std::string > ListOutputNames() const
List the names of outputs for this symbol.
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:90
NDArrayStorageType
Definition: ndarray.h:60
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray.
Definition: utils.h:129
Engine that schedules all the operations according to dependency.
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:219
int GetNumThreadsPerGPU()
Definition: utils.h:507
void SparseRetainOpForwardRspWrapper(mshadow::Stream< xpu > *s, const NDArray &input_nd, const TBlob &idx_data, const OpReqType req, NDArray *output_nd)
Pick rows specified by user input index array from a row sparse ndarray and save them in the output s...
const mxnet::TShape & aux_shape(size_t index) const
get the shape of aux_data(index)
Definition: ndarray.h:241
The attributes of the current operation node. Usually are additional parameters like axis...
Definition: node.h:119
std::vector< NodeEntry > outputs
output entries contained in the symbol
Definition: symbolic.h:73
std::string operator_stype_string(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > &in_attrs, const std::vector< int > &out_attrs)
get string representation of the operator stypes
Definition: utils.h:424
namespace of mxnet
Definition: api_registry.h:33
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:119
void KnownBound
Type of T.
Definition: utils.h:621
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:538
int64_t dim_t
data type to store dim size
Definition: tuple.h:38
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:73
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:669
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:521
void LogOnce(const std::string &message)
log message once. Intended for storage fallback warning messages.
Definition: utils.h:468
int GetDefaultDtype()
Definition: utils.h:918
Context ctx
base Context
Definition: base.h:351
A threadlocal store to store threadlocal variables. Will return a thread local singleton of type T...
Definition: thread_local.h:35
execution time context. The information needed in runtime for actual execution.
Definition: base.h:349
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:122
void ExecuteMonInputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray *> &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:387
Graph node data structure.
Data structures that can appear in graph attributes.
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:560
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:251
std::string operator_string(const nnvm::NodeAttrs &attrs, const OpContext &ctx, const std::vector< NDArray > &inputs, const std::vector< OpReqType > &req, const std::vector< NDArray > &outputs)
get string representation of the operator
Definition: utils.h:450
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:50
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:584
#define MSHADOW_XINLINE
Definition: base.h:230
std::vector< mxnet::TShape > ShapeVector
The result holder of shape of each NodeEntry in the graph.
Definition: tuple.h:819
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:106
const Op * op
The operator this node uses. For place holder variable, op == nullptr.
Definition: node.h:124
Auxiliary data structure to index a graph. It maps Nodes in the graph to consecutive integers node_id...
Definition: graph.h:107
NDArrayStorageType storage_type() const
Definition: ndarray.h:321
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:369
std::string dev_type_string(const int dev_type)
get string representation of device type
Definition: utils.h:400
bool is_np_default_dtype() const
return current numpy default dtype compatibility status.
Definition: imperative.h:123
Helper for non-array type T.
Definition: utils.h:595
bool dim_size_is_known(const dim_t dim_size)
Definition: tuple.h:419
void ParallelAdd(DType *dst, const DType *src, index_t size)
Definition: utils.h:768
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&... args)
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:638
std::string name
name of the operator
Definition: op.h:106
bool is_int(const int dtype)
Definition: utils.h:877
an entry that represents output data from a node
Definition: node.h:51
int np_binary_out_infer_type(const int type1, const int type2)
Definition: utils.h:910
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:73
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:610
Configuation of nnvm as well as basic data structure.
OpReqType
operation request type to Forward and Backward
Definition: op_attr_types.h:45
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:43
bool ContainsStorageType(const std::vector< NDArray > &ndarrays, const NDArrayStorageType stype)
returns true if storage type of any array in ndarrays is the same as the target stype. false is returned for empty inputs.
Definition: utils.h:341
constexpr size_t MaxIntegerValue()
Return the max integer value representable in the type T without loss of precision.
Definition: utils.h:688
void ExecuteMonOutputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray *> &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)
RunContext run_ctx
RunContext related resources.
Definition: op_attr_types.h:72
static Imperative * Get()
std::unordered_map< std::string, std::string > dict
The dictionary representation of attributes.
Definition: node.h:128
size_t current_process_id()
Definition: utils.h:67
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:599
A Shape class that is used to represent shape of each tensor.
Definition: tuple.h:437
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray.
Definition: utils.h:183
const mxnet::TShape & storage_shape() const
Definition: ndarray.h:229
int GetExecNumMatchColor()
Definition: utils.h:514
header to handle OpenMP compatibility issues
#define MSHADOW_TYPE_SWITCH(type, DType,...)
Definition: base.h:1074
std::string attr_value_string(const nnvm::NodeAttrs &attrs, const std::string &attr_name, std::string default_val="")
Definition: utils.h:414
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:108
mxnet::TShape CanonicalizeAxes(const mxnet::TShape &src)
Definition: utils.h:858
bool ndim_is_known(const int ndim)
Definition: tuple.h:413
int get_more_precise_type(const int type1, const int type2)
Definition: utils.h:882
const mxnet::TShape & shape() const
Definition: ndarray.h:221
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:94
MSHADOW_XINLINE int ilog2ul(size_t a)
Definition: utils.h:704
void LogStorageFallback(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > *in_attrs, const std::vector< int > *out_attrs)
log storage fallback event
Definition: utils.h:479
Context information about the execution environment.
Definition: base.h:101
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:88
bool is_float(const int dtype)
Definition: utils.h:873
int ndim() const
Definition: tuple.h:217
ndarray interface
Definition: ndarray.h:81
void ParallelCopy(DType *dst, const DType *src, index_t size)
parallelize copy by OpenMP.
Definition: utils.h:752
NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:719
void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype, std::vector< NDArray > *vec)
Helper to add a NDArray of zeros to a std::vector.
Definition: utils.h:734
MSHADOW_XINLINE int ilog2ui(unsigned int a)
Definition: utils.h:710
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:75
Symbol is help class used to represent the operator node in Graph.
Definition: symbolic.h:50
void ConvertToLegacyShape(mxnet::TShape *shape)
This is function is used to convert shapes returned by the infer shape functions/pass to the legacy s...
Definition: utils.h:822
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
Operator structure.
Definition: op.h:103
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:65
void ConvertToNumpyShape(mxnet::TShape *shape)
If numpy compatibility is turned off (default), the shapes passed in by users follow the legacy shape...
Definition: utils.h:800
computaion stream structure, used for asynchronous computations
Definition: tensor.h:383