25 #ifndef MXNET_COMMON_UTILS_H_ 26 #define MXNET_COMMON_UTILS_H_ 28 #include <dmlc/logging.h> 30 #include <nnvm/graph.h> 35 #include <nnvm/graph_attr_types.h> 39 #include <type_traits> 47 #include "../operator/mxnet_op.h" 57 template<
typename DType,
typename IType>
58 MSHADOW_XINLINE
static void Map(
int i, DType* out,
const IType* indptr,
59 const nnvm::dim_t end,
const nnvm::dim_t idx_size) {
60 if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
61 (i == 0 && indptr[i] != 0) ||
62 (i == end - 1 && indptr[end] != idx_size))
72 template<
typename DType,
typename IType,
typename RType>
73 MSHADOW_XINLINE
static void Map(
int i, DType* out,
const IType* idx,
74 const RType* indptr,
const nnvm::dim_t ncols) {
75 for (RType j = indptr[i]; j < indptr[i+1]; j++) {
76 if (idx[j] >= ncols || idx[j] < 0 ||
77 (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
90 template<
typename DType,
typename IType>
91 MSHADOW_XINLINE
static void Map(
int i, DType* out,
const IType* idx,
92 const nnvm::dim_t end,
const nnvm::dim_t nrows) {
93 if ((i < end && idx[i+1] <= idx[i])
94 || idx[i] < 0 || idx[i] >= nrows)
99 template<
typename xpu>
101 const TBlob &err_cpu,
const bool full_check);
111 template<
typename xpu>
113 const TBlob &err_cpu,
const bool full_check) {
114 using namespace op::mxnet_op;
116 <<
"CheckFormatCSRImpl is for CSRNDArray";
121 if ((shape.ndim() != 2) ||
122 (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
123 (indptr_shape[0] != shape[0] + 1) ||
124 (idx_shape[0] != storage_shape[0])) {
125 MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
126 DType* err = err_cpu.dptr<DType>();
132 MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
133 MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
134 MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
135 mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
136 NDArray ret_xpu = NDArray(mshadow::Shape1(1),
137 rctx.get_ctx(), false, err_cpu.type_flag_);
138 TBlob val_xpu = ret_xpu.data();
139 Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
140 Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
141 input.aux_data(csr::kIndPtr).dptr<RType>(),
142 indptr_shape[0] - 1, idx_shape[0]);
144 if (idx_shape[0] != 0) {
145 Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
146 input.aux_data(csr::kIdx).dptr<IType>(),
147 input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
149 mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
150 val_xpu.get<xpu, 1, DType>(s), s);
165 template<
typename xpu>
167 const TBlob &err_cpu,
const bool full_check) {
168 using namespace op::mxnet_op;
170 <<
"CheckFormatRSPImpl is for RSPNDArray";
173 MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
174 DType* err = err_cpu.dptr<DType>();
179 if (idx_shape[0] == 0) {
183 MSHADOW_TYPE_SWITCH(err_cpu.
type_flag_, DType, {
184 MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
185 mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
186 NDArray ret_xpu = NDArray(mshadow::Shape1(1),
187 rctx.get_ctx(), false, err_cpu.type_flag_);
188 TBlob val_xpu = ret_xpu.data();
189 Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
191 Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
192 val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
193 idx_shape[0] - 1, input.shape()[0]);
194 mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
195 val_xpu.get<xpu, 1, DType>(s), s);
201 template<
typename xpu>
203 const TBlob &err_cpu,
const bool full_check) {
206 CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
208 CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
212 LOG(FATAL) <<
"Unknown storage type " << stype;
217 template<
typename xpu>
225 if (!vstorage.empty()) {
226 for (
const auto& i : vstorage) {
227 if (i != stype)
return false;
245 if (!vstorage.empty()) {
247 for (
const auto i : vstorage) {
250 }
else if (i == stype2) {
257 *has_both = has == 3;
269 if (!ndarrays.empty()) {
270 for (
const auto& nd : ndarrays) {
271 if (nd.storage_type() != stype) {
290 if (!ndarrays.empty()) {
292 for (
const auto& nd : ndarrays) {
294 if (stype == stype1) {
296 }
else if (stype == stype2) {
303 *has_both = has == 3;
316 return "fcompute_ex";
318 return "fcompute_fallback";
344 return dmlc::GetEnv(
"MXNET_GPU_WORKER_NTHREADS", 2);
351 int num_match_color = dmlc::GetEnv(
"MXNET_EXEC_NUM_TEMP", 1);
355 template<
typename T,
typename V>
358 #pragma omp parallel for reduction(+:sum) 359 for (
int i = 0; i < n; ++i) {
372 template<
typename RandomIt,
typename Compare>
374 size_t grainsize,
const Compare& comp) {
375 if (len < grainsize) {
378 std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
381 std::inplace_merge(first, first+len/2, first+len, comp);
394 template<
typename RandomIt,
typename Compare>
395 void ParallelSort(RandomIt first, RandomIt last,
size_t num_threads, Compare comp) {
396 const auto num = std::distance(first, last);
397 size_t grainsize =
std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
410 template<
typename RandomIt>
413 std::less<
typename std::iterator_traits<RandomIt>::value_type>());
451 template <
class T,
size_t kSize>
472 template <
class T,
class... Args>
474 return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
488 using U =
typename std::remove_extent<T>::type;
489 return std::unique_ptr<T>(
new U[n]{});
500 template <
class T,
class... Args>
503 template<
typename FCompType>
506 static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name +
"<cpu>");
507 static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name +
"<gpu>");
509 if (ctx.
dev_mask() == cpu::kDevMask) {
510 return fcompute_cpu.get(op,
nullptr);
511 }
else if (ctx.
dev_mask() == gpu::kDevMask) {
512 return fcompute_gpu.get(op,
nullptr);
514 LOG(FATAL) <<
"Unknown device mask";
521 #endif // MXNET_COMMON_UTILS_H_ Symbol min(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:2219
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:73
NDArrayStorageType
Definition: ndarray.h:59
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray.
Definition: utils.h:112
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:160
NDArrayStorageType storage_type() const
Definition: ndarray.h:297
Engine that schedules all the operations according to dependency.
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:202
const TShape & storage_shape() const
Definition: ndarray.h:204
namespace of mxnet
Definition: base.h:127
Additional operator attributes beside the ones provided by NNVM.
void KnownBound
Type of T.
Definition: utils.h:456
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:373
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:67
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:504
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:356
nnvm::TShape TShape
Shape data structure used to record shape information.
Definition: base.h:137
int GetNumThreadPerGPU()
Definition: utils.h:342
execution time context. The information needed in runtime for actual execution.
Definition: base.h:253
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:105
Symbol max(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:2182
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:329
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:395
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:223
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:419
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:89
const TShape & shape() const
Definition: ndarray.h:196
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:311
Helper for non-array type T.
Definition: utils.h:430
Data structures that can appear in graph attributes.
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:56
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:445
nnvm::Op Op
operator structure from NNVM
Definition: base.h:139
Symbol sort(const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)
Definition: op.h:2439
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:434
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray.
Definition: utils.h:166
int GetExecNumMatchColor()
Definition: utils.h:349
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:91
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&...args)
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:473
Context information about the execution environment.
Definition: base.h:142
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:71
const TShape & aux_shape(size_t index) const
get the shape of aux_data(index)
Definition: ndarray.h:216
ndarray interface
Definition: ndarray.h:79
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:58
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:59
Symbol sum(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=false, bool exclude=false)
Definition: op.h:1993