mxnet
utils.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
25 #ifndef MXNET_COMMON_UTILS_H_
26 #define MXNET_COMMON_UTILS_H_
27 
28 #include <dmlc/logging.h>
29 #include <dmlc/omp.h>
30 #include <nnvm/graph.h>
31 #include <mxnet/engine.h>
32 #include <mxnet/ndarray.h>
33 #include <mxnet/op_attr_types.h>
34 #include <mxnet/graph_attr_types.h>
35 #include <nnvm/graph_attr_types.h>
36 
37 #include <memory>
38 #include <vector>
39 #include <type_traits>
40 #include <utility>
41 #include <random>
42 #include <string>
43 #include <thread>
44 #include <algorithm>
45 #include <functional>
46 #include <limits>
47 
48 #include "../operator/mxnet_op.h"
49 #if MXNET_USE_MKLDNN == 1
50 #include "../operator/nn/mkldnn/mkldnn_base-inl.h"
51 #endif
52 
53 namespace mxnet {
54 namespace common {
55 
61  template<typename DType, typename IType>
62  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* indptr,
63  const nnvm::dim_t end, const nnvm::dim_t idx_size) {
64  if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
65  (i == 0 && indptr[i] != 0) ||
66  (i == end - 1 && indptr[end] != idx_size))
67  *out = kCSRIndPtrErr;
68  }
69 };
70 
75 struct csr_idx_check {
76  template<typename DType, typename IType, typename RType>
77  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
78  const RType* indptr, const nnvm::dim_t ncols) {
79  for (RType j = indptr[i]; j < indptr[i+1]; j++) {
80  if (idx[j] >= ncols || idx[j] < 0 ||
81  (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
82  *out = kCSRIdxErr;
83  break;
84  }
85  }
86  }
87 };
88 
93 struct rsp_idx_check {
94  template<typename DType, typename IType>
95  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
96  const nnvm::dim_t end, const nnvm::dim_t nrows) {
97  if ((i < end && idx[i+1] <= idx[i])
98  || idx[i] < 0 || idx[i] >= nrows)
99  *out = kRSPIdxErr;
100  }
101 };
102 
103 template<typename xpu>
104 void CheckFormatWrapper(const RunContext &rctx, const NDArray &input,
105  const TBlob &err_cpu, const bool full_check);
106 
115 template<typename xpu>
116 void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input,
117  const TBlob &err_cpu, const bool full_check) {
118  using namespace op::mxnet_op;
119  CHECK_EQ(input.storage_type(), kCSRStorage)
120  << "CheckFormatCSRImpl is for CSRNDArray";
121  const TShape shape = input.shape();
122  const TShape idx_shape = input.aux_shape(csr::kIdx);
123  const TShape indptr_shape = input.aux_shape(csr::kIndPtr);
124  const TShape storage_shape = input.storage_shape();
125  if ((shape.ndim() != 2) ||
126  (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
127  (indptr_shape[0] != shape[0] + 1) ||
128  (idx_shape[0] != storage_shape[0])) {
129  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
130  DType* err = err_cpu.dptr<DType>();
131  *err = kCSRShapeErr;
132  });
133  return;
134  }
135  if (full_check) {
136  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
137  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
138  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
139  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
140  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
141  rctx.get_ctx(), false, err_cpu.type_flag_);
142  TBlob val_xpu = ret_xpu.data();
143  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
144  Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
145  input.aux_data(csr::kIndPtr).dptr<RType>(),
146  indptr_shape[0] - 1, idx_shape[0]);
147  // no need to check indices if indices are empty
148  if (idx_shape[0] != 0) {
149  Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
150  input.aux_data(csr::kIdx).dptr<IType>(),
151  input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
152  }
153  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
154  val_xpu.get<xpu, 1, DType>(s), s);
155  });
156  });
157  });
158  }
159 }
160 
169 template<typename xpu>
170 void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input,
171  const TBlob &err_cpu, const bool full_check) {
172  using namespace op::mxnet_op;
173  CHECK_EQ(input.storage_type(), kRowSparseStorage)
174  << "CheckFormatRSPImpl is for RSPNDArray";
175  const TShape idx_shape = input.aux_shape(rowsparse::kIdx);
176  if (idx_shape[0] != input.storage_shape()[0]) {
177  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
178  DType* err = err_cpu.dptr<DType>();
179  *err = kRSPShapeErr;
180  });
181  return;
182  }
183  if (idx_shape[0] == 0) {
184  return;
185  }
186  if (full_check) {
187  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
188  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
189  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
190  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
191  rctx.get_ctx(), false, err_cpu.type_flag_);
192  TBlob val_xpu = ret_xpu.data();
193  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
194 
195  Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
196  val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
197  idx_shape[0] - 1, input.shape()[0]);
198  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
199  val_xpu.get<xpu, 1, DType>(s), s);
200  });
201  });
202  }
203 }
204 
205 template<typename xpu>
206 void CheckFormatImpl(const RunContext &rctx, const NDArray &input,
207  const TBlob &err_cpu, const bool full_check) {
208  int stype = input.storage_type();
209  if (stype == kCSRStorage) {
210  CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
211  } else if (stype == kRowSparseStorage) {
212  CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
213  } else if (stype == kDefaultStorage) {
214  // no-op for default storage
215  } else {
216  LOG(FATAL) << "Unknown storage type " << stype;
217  }
218 }
219 
223 template<typename xpu>
224 void SparseRetainOpForwardRspWrapper(mshadow::Stream<xpu> *s,
225  const NDArray& input_nd,
226  const TBlob& idx_data,
227  const OpReqType req,
228  NDArray* output_nd);
229 
230 /* \brief Casts tensor storage type to the new type.
231  */
232 template<typename xpu>
233 void CastStorageDispatch(const OpContext& ctx, const NDArray& input, const NDArray& output);
234 
238 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
239  const NDArrayStorageType stype) {
240  if (!vstorage.empty()) {
241  for (const auto& i : vstorage) {
242  if (i != stype) return false;
243  }
244  return true;
245  }
246  return false;
247 }
248 
253 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
254  const NDArrayStorageType stype1,
255  const NDArrayStorageType stype2,
256  bool *has_both) {
257  if (has_both) {
258  *has_both = false;
259  }
260  if (!vstorage.empty()) {
261  uint8_t has = 0;
262  for (const auto i : vstorage) {
263  if (i == stype1) {
264  has |= 1;
265  } else if (i == stype2) {
266  has |= 2;
267  } else {
268  return false;
269  }
270  }
271  if (has_both) {
272  *has_both = has == 3;
273  }
274  return true;
275  }
276  return false;
277 }
278 
282 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
283  const NDArrayStorageType stype) {
284  if (!ndarrays.empty()) {
285  for (const auto& nd : ndarrays) {
286  if (nd.storage_type() != stype) {
287  return false;
288  }
289  }
290  return true;
291  }
292  return false;
293 }
294 
298 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
299  const NDArrayStorageType stype1,
300  const NDArrayStorageType stype2,
301  bool *has_both) {
302  if (has_both) {
303  *has_both = false;
304  }
305  if (!ndarrays.empty()) {
306  uint8_t has = 0;
307  for (const auto& nd : ndarrays) {
308  const NDArrayStorageType stype = nd.storage_type();
309  if (stype == stype1) {
310  has |= 1;
311  } else if (stype == stype2) {
312  has |= 2;
313  } else {
314  return false;
315  }
316  }
317  if (has_both) {
318  *has_both = has == 3;
319  }
320  return true;
321  }
322  return false;
323 }
324 
328 inline bool ContainsStorageType(const std::vector<NDArray>& ndarrays,
329  const NDArrayStorageType stype) {
330  if (!ndarrays.empty()) {
331  for (const auto& nd : ndarrays) {
332  if (nd.storage_type() == stype) {
333  return true;
334  }
335  }
336  }
337  return false;
338 }
339 
343 inline bool ContainsStorageType(const std::vector<int>& ndstypes,
344  const NDArrayStorageType stype) {
345  if (!ndstypes.empty()) {
346  for (const auto& ndstype : ndstypes) {
347  if (ndstype == stype) {
348  return true;
349  }
350  }
351  }
352  return false;
353 }
354 
356 inline std::string dispatch_mode_string(const DispatchMode x) {
357  switch (x) {
359  return "fcompute";
361  return "fcompute_ex";
363  return "fcompute_fallback";
365  return "variable";
367  return "undefined";
368  }
369  return "unknown";
370 }
371 
372 
374 inline std::string stype_string(const int x) {
375  switch (x) {
376  case kDefaultStorage:
377  return "default";
378  case kCSRStorage:
379  return "csr";
380  case kRowSparseStorage:
381  return "row_sparse";
382  }
383  return "unknown";
384 }
385 
387 inline std::string dev_type_string(const int dev_type) {
388  switch (dev_type) {
389  case Context::kCPU:
390  return "cpu";
391  case Context::kGPU:
392  return "gpu";
393  case Context::kCPUPinned:
394  return "cpu_pinned";
395  case Context::kCPUShared:
396  return "cpu_shared";
397  }
398  return "unknown";
399 }
400 
402 inline std::string operator_stype_string(const nnvm::NodeAttrs& attrs,
403  const int dev_mask,
404  const std::vector<int>& in_attrs,
405  const std::vector<int>& out_attrs) {
406  std::ostringstream os;
407  os << "operator = " << attrs.op->name
408  << "\ninput storage types = [";
409  for (const int attr : in_attrs) {
410  os << stype_string(attr) << ", ";
411  }
412  os << "]\n"
413  << "output storage types = [";
414  for (const int attr : out_attrs) {
415  os << stype_string(attr) << ", ";
416  }
417  os << "]\n"
418  << "params = {";
419  for (auto kv : attrs.dict) {
420  os << "\"" << kv.first << "\" : " << kv.second << ", ";
421  }
422  os << "}\n"
423  << "context.dev_mask = " << dev_type_string(dev_mask);
424  return os.str();
425 }
426 
428 inline std::string operator_string(const nnvm::NodeAttrs& attrs,
429  const OpContext& ctx,
430  const std::vector<NDArray>& inputs,
431  const std::vector<OpReqType>& req,
432  const std::vector<NDArray>& outputs) {
433  std::string result = "";
434  std::vector<int> in_stypes;
435  std::vector<int> out_stypes;
436  in_stypes.reserve(inputs.size());
437  out_stypes.reserve(outputs.size());
438  auto xform = [](const NDArray arr) -> int { return arr.storage_type(); };
439  std::transform(inputs.begin(), inputs.end(), std::back_inserter(in_stypes), xform);
440  std::transform(outputs.begin(), outputs.end(), std::back_inserter(out_stypes), xform);
441  result += operator_stype_string(attrs, ctx.run_ctx.ctx.dev_mask(), in_stypes, out_stypes);
442  return result;
443 }
444 
446 inline void LogOnce(const std::string& message) {
447  typedef dmlc::ThreadLocalStore<std::unordered_set<std::string>> LogStore;
448  auto log_store = LogStore::Get();
449  if (log_store->find(message) == log_store->end()) {
450  LOG(INFO) << message;
451  log_store->insert(message);
452  }
453 }
454 
457 inline void LogStorageFallback(const nnvm::NodeAttrs& attrs,
458  const int dev_mask,
459  const std::vector<int>* in_attrs,
460  const std::vector<int>* out_attrs) {
461  static bool log = dmlc::GetEnv("MXNET_STORAGE_FALLBACK_LOG_VERBOSE", true);
462  if (!log) return;
463  const std::string op_str = operator_stype_string(attrs, dev_mask, *in_attrs, *out_attrs);
464  std::ostringstream os;
465  const char* warning = "\nThe operator with default storage type will be dispatched "
466  "for execution. You're seeing this warning message because the operator above is unable "
467  "to process the given ndarrays with specified storage types, context and parameter. "
468  "Temporary dense ndarrays are generated in order to execute the operator. "
469  "This does not affect the correctness of the programme. "
470  "You can set environment variable MXNET_STORAGE_FALLBACK_LOG_VERBOSE to "
471  "0 to suppress this warning.";
472  os << "\nStorage type fallback detected:\n" << op_str << warning;
473  LogOnce(os.str());
474 #if MXNET_USE_MKLDNN == 1
475  if (!MKLDNNEnvSet()) common::LogOnce("MXNET_MKLDNN_ENABLED flag is off. "
476  "You can re-enable by setting MXNET_MKLDNN_ENABLED=1");
477  if (GetMKLDNNCacheSize() != -1) common::LogOnce("MXNET_MKLDNN_CACHE_NUM is set."
478  "Should only be set if "
479  "your model has variable input shapes, "
480  "as cache size may grow unbounded");
481 #endif
482 }
483 
484 // heuristic to dermine number of threads per GPU
485 inline int GetNumThreadsPerGPU() {
486  // This is resource efficient option.
487  return dmlc::GetEnv("MXNET_GPU_WORKER_NTHREADS", 2);
488 }
489 
490 // heuristic to get number of matching colors.
491 // this decides how much parallelism we can get in each GPU.
492 inline int GetExecNumMatchColor() {
493  // This is resource efficient option.
494  int num_match_color = dmlc::GetEnv("MXNET_EXEC_NUM_TEMP", 1);
495  return std::min(num_match_color, GetNumThreadsPerGPU());
496 }
497 
498 template<typename T, typename V>
499 V ParallelAccumulate(const T* a, const int n, V start) {
500  V sum = start;
501 #pragma omp parallel for reduction(+:sum)
502  for (int i = 0; i < n; ++i) {
503  sum += a[i];
504  }
505  return sum;
506 }
507 
515 template<typename RandomIt, typename Compare>
516 void ParallelSortHelper(RandomIt first, size_t len,
517  size_t grainsize, const Compare& comp) {
518  if (len < grainsize) {
519  std::sort(first, first+len, comp);
520  } else {
521  std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
522  ParallelSortHelper(first+len/2, len - len/2, grainsize, comp);
523  thr.join();
524  std::inplace_merge(first, first+len/2, first+len, comp);
525  }
526 }
527 
537 template<typename RandomIt, typename Compare>
538 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp) {
539  const auto num = std::distance(first, last);
540  size_t grainsize = std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
541  ParallelSortHelper(first, num, grainsize, comp);
542 }
543 
553 template<typename RandomIt>
554 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads) {
555  ParallelSort(first, last, num_threads,
556  std::less<typename std::iterator_traits<RandomIt>::value_type>());
557 }
558 
562 typedef std::mt19937 RANDOM_ENGINE;
563 
567 namespace helper {
568 
572 template <class T>
573 struct UniqueIf {
577  using SingleObject = std::unique_ptr<T>;
578 };
579 
583 template <class T>
584 struct UniqueIf<T[]> {
588  using UnknownBound = std::unique_ptr<T[]>;
589 };
590 
594 template <class T, size_t kSize>
595 struct UniqueIf<T[kSize]> {
599  using KnownBound = void;
600 };
601 
602 } // namespace helper
603 
615 template <class T, class... Args>
617  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
618 }
619 
629 template <class T>
631  using U = typename std::remove_extent<T>::type;
632  return std::unique_ptr<T>(new U[n]{});
633 }
634 
643 template <class T, class... Args>
644 typename helper::UniqueIf<T>::KnownBound MakeUnique(Args&&... args) = delete;
645 
646 template<typename FCompType>
647 FCompType GetFCompute(const nnvm::Op* op, const std::string& name,
648  const Context& ctx) {
649  static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + "<cpu>");
650  static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + "<gpu>");
651 
652  if (ctx.dev_mask() == cpu::kDevMask) {
653  return fcompute_cpu.get(op, nullptr);
654  } else if (ctx.dev_mask() == gpu::kDevMask) {
655  return fcompute_gpu.get(op, nullptr);
656  } else {
657  LOG(FATAL) << "Unknown device mask";
658  return nullptr;
659  }
660 }
661 
665 template <typename T>
666 constexpr size_t MaxIntegerValue() {
667  return std::is_integral<T>::value ?
669  size_t(2) << (std::numeric_limits<T>::digits - 1);
670 }
671 
672 template <>
673 constexpr size_t MaxIntegerValue<mshadow::half::half_t>() {
674  return size_t(2) << 10;
675 }
676 
677 MSHADOW_XINLINE int ilog2ul(size_t a) {
678  int k = 1;
679  while (a >>= 1) ++k;
680  return k;
681 }
682 
683 MSHADOW_XINLINE int ilog2ui(unsigned int a) {
684  int k = 1;
685  while (a >>= 1) ++k;
686  return k;
687 }
688 
692 inline NDArray InitZeros(const NDArrayStorageType stype, const TShape &shape,
693  const Context &ctx, const int dtype) {
694  // NDArray with default storage
695  if (stype == kDefaultStorage) {
696  NDArray ret(shape, ctx, false, dtype);
697  ret = 0;
698  return ret;
699  }
700  // NDArray with non-default storage. Storage allocation is always delayed.
701  return NDArray(stype, shape, ctx, true, dtype);
702 }
703 
707 inline void EmplaceBackZeros(const NDArrayStorageType stype, const TShape &shape,
708  const Context &ctx, const int dtype,
709  std::vector<NDArray> *vec) {
710  // NDArray with default storage
711  if (stype == kDefaultStorage) {
712  vec->emplace_back(shape, ctx, false, dtype);
713  vec->back() = 0;
714  } else {
715  // NDArray with non-default storage. Storage allocation is always delayed.
716  vec->emplace_back(stype, shape, ctx, true, dtype);
717  }
718 }
719 
720 
724 template<typename DType>
725 inline void ParallelCopy(DType* dst, const DType* src, index_t size) {
726  static index_t copy_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_COPY_SIZE", 200000);
727  if (size >= copy_block_size) {
728  #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
729  for (index_t i = 0; i < size; ++i) {
730  dst[i] = src[i];
731  }
732  } else {
733  std::memcpy(dst, src, sizeof(DType) * size);
734  }
735 }
736 
737 } // namespace common
738 } // namespace mxnet
739 #endif // MXNET_COMMON_UTILS_H_
Definition: ndarray.h:74
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:77
Definition: ndarray.h:63
NDArrayStorageType
Definition: ndarray.h:61
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray.
Definition: utils.h:116
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:151
Definition: ndarray.h:54
NDArrayStorageType storage_type() const
Definition: ndarray.h:313
Engine that schedules all the operations according to dependency.
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:206
int GetNumThreadsPerGPU()
Definition: utils.h:485
void SparseRetainOpForwardRspWrapper(mshadow::Stream< xpu > *s, const NDArray &input_nd, const TBlob &idx_data, const OpReqType req, NDArray *output_nd)
Pick rows specified by user input index array from a row sparse ndarray and save them in the output s...
const TShape & storage_shape() const
Definition: ndarray.h:221
std::string operator_stype_string(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > &in_attrs, const std::vector< int > &out_attrs)
get string representation of the operator stypes
Definition: utils.h:402
namespace of mxnet
Definition: base.h:118
Additional operator attributes beside the ones provided by NNVM.
void KnownBound
Type of T.
Definition: utils.h:599
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:516
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:74
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:647
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:499
void LogOnce(const std::string &message)
log message once. Intended for storage fallback warning messages.
Definition: utils.h:446
nnvm::TShape TShape
Shape data structure used to record shape information.
Definition: base.h:128
Context ctx
base Context
Definition: base.h:259
Definition: ndarray.h:72
execution time context. The information needed in runtime for actual execution.
Definition: base.h:257
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:113
NDArray InitZeros(const NDArrayStorageType stype, const TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:692
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:374
Definition: ndarray.h:65
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: base.h:136
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:538
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:238
std::string operator_string(const nnvm::NodeAttrs &attrs, const OpContext &ctx, const std::vector< NDArray > &inputs, const std::vector< OpReqType > &req, const std::vector< NDArray > &outputs)
get string representation of the operator
Definition: utils.h:428
Symbol max(const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
Definition: op.h:2756
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:562
void EmplaceBackZeros(const NDArrayStorageType stype, const TShape &shape, const Context &ctx, const int dtype, std::vector< NDArray > *vec)
Helper to add a NDArray of zeros to a std::vector.
Definition: utils.h:707
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:93
Definition: ndarray.h:58
Definition: base.h:138
const TShape & shape() const
Definition: ndarray.h:213
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:356
std::string dev_type_string(const int dev_type)
get string representation of device type
Definition: utils.h:387
Symbol log(const std::string &symbol_name, Symbol data)
Definition: op.h:2355
Helper for non-array type T.
Definition: utils.h:573
Definition: base.h:137
Definition: ndarray.h:54
Data structures that can appear in graph attributes.
Definition: ndarray.h:64
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:60
Definition: ndarray.h:71
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:588
OpReqType
operation request type to Forward and Backward
Definition: op_attr_types.h:45
nnvm::Op Op
operator structure from NNVM
Definition: base.h:130
bool ContainsStorageType(const std::vector< NDArray > &ndarrays, const NDArrayStorageType stype)
returns true if storage type of any array in ndarrays is the same as the target stype. false is returned for empty inputs.
Definition: utils.h:328
constexpr size_t MaxIntegerValue()
Return the max integer value representable in the type T without loss of precision.
Definition: utils.h:666
RunContext run_ctx
RunContext related resources.
Definition: op_attr_types.h:72
int64_t dim_t
data type to store dim size
Definition: c_api.h:62
Symbol sort(const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=true)
Definition: op.h:3107
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:577
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray.
Definition: utils.h:170
int GetExecNumMatchColor()
Definition: utils.h:492
Definition: base.h:139
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:95
Symbol min(const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
Definition: op.h:2793
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:124
MSHADOW_XINLINE int ilog2ul(size_t a)
Definition: utils.h:677
void LogStorageFallback(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > *in_attrs, const std::vector< int > *out_attrs)
log storage fallback event
Definition: utils.h:457
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&...args)
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:616
Context information about the execution environment.
Definition: base.h:133
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:75
const TShape & aux_shape(size_t index) const
get the shape of aux_data(index)
Definition: ndarray.h:233
ndarray interface
Definition: ndarray.h:82
void ParallelCopy(DType *dst, const DType *src, index_t size)
parallelize copy by OpenMP.
Definition: utils.h:725
MSHADOW_XINLINE int ilog2ui(unsigned int a)
Definition: utils.h:683
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:62
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
Symbol sum(const std::string &symbol_name, Symbol data, dmlc::optional< Shape > axis=dmlc::optional< Shape >(), bool keepdims=false, bool exclude=false)
Definition: op.h:2567
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:66