mxnet
utils.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef MXNET_COMMON_UTILS_H_
25 #define MXNET_COMMON_UTILS_H_
26 
27 #include <dmlc/logging.h>
28 #include <dmlc/omp.h>
29 #include <nnvm/graph.h>
30 #include <nnvm/node.h>
31 #include <mxnet/engine.h>
32 #include <mxnet/ndarray.h>
33 #include <mxnet/imperative.h>
34 #include <mxnet/op_attr_types.h>
35 #include <mxnet/graph_attr_types.h>
36 #include <nnvm/graph_attr_types.h>
37 
38 #include <memory>
39 #include <vector>
40 #include <type_traits>
41 #include <utility>
42 #include <random>
43 #include <string>
44 #include <thread>
45 #include <algorithm>
46 #include <functional>
47 #include <limits>
48 
49 #include "../operator/mxnet_op.h"
50 #if MXNET_USE_MKLDNN == 1
51 #include "../operator/nn/mkldnn/mkldnn_base-inl.h"
52 #endif
53 
54 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
55 #include <windows.h>
56 #else
57 #include <unistd.h>
58 #endif
59 
60 
61 namespace mxnet {
62 namespace common {
63 
64 #if defined(_WIN32) || defined(_WIN64) || defined(__WINDOWS__)
65 inline size_t current_process_id() { return ::GetCurrentProcessId(); }
66 #else
67 inline size_t current_process_id() { return getpid(); }
68 #endif
69 
74  template<typename DType, typename IType>
75  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* indptr,
76  const nnvm::dim_t end, const nnvm::dim_t idx_size) {
77  if (indptr[i+1] < 0 || indptr[i+1] < indptr[i] ||
78  (i == 0 && indptr[i] != 0) ||
79  (i == end - 1 && indptr[end] != idx_size))
80  *out = kCSRIndPtrErr;
81  }
82 };
83 
88 struct csr_idx_check {
89  template<typename DType, typename IType, typename RType>
90  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
91  const RType* indptr, const nnvm::dim_t ncols) {
92  for (RType j = indptr[i]; j < indptr[i+1]; j++) {
93  if (idx[j] >= ncols || idx[j] < 0 ||
94  (j < indptr[i+1] - 1 && idx[j] >= idx[j+1])) {
95  *out = kCSRIdxErr;
96  break;
97  }
98  }
99  }
100 };
101 
107  template<typename DType, typename IType>
108  MSHADOW_XINLINE static void Map(int i, DType* out, const IType* idx,
109  const nnvm::dim_t end, const nnvm::dim_t nrows) {
110  if ((i < end && idx[i+1] <= idx[i])
111  || idx[i] < 0 || idx[i] >= nrows)
112  *out = kRSPIdxErr;
113  }
114 };
115 
116 template<typename xpu>
117 void CheckFormatWrapper(const RunContext &rctx, const NDArray &input,
118  const TBlob &err_cpu, const bool full_check);
119 
128 template<typename xpu>
129 void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input,
130  const TBlob &err_cpu, const bool full_check) {
131  using namespace op::mxnet_op;
132  CHECK_EQ(input.storage_type(), kCSRStorage)
133  << "CheckFormatCSRImpl is for CSRNDArray";
134  const mxnet::TShape shape = input.shape();
135  const mxnet::TShape idx_shape = input.aux_shape(csr::kIdx);
136  const mxnet::TShape indptr_shape = input.aux_shape(csr::kIndPtr);
137  const mxnet::TShape storage_shape = input.storage_shape();
138  if ((shape.ndim() != 2) ||
139  (idx_shape.ndim() != 1 || indptr_shape.ndim() != 1 || storage_shape.ndim() != 1) ||
140  (indptr_shape[0] != shape[0] + 1) ||
141  (idx_shape[0] != storage_shape[0])) {
142  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
143  DType* err = err_cpu.dptr<DType>();
144  *err = kCSRShapeErr;
145  });
146  return;
147  }
148  if (full_check) {
149  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
150  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIndPtr), RType, {
151  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(csr::kIdx), IType, {
152  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
153  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
154  rctx.get_ctx(), false, err_cpu.type_flag_);
155  TBlob val_xpu = ret_xpu.data();
156  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
157  Kernel<csr_indptr_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
158  input.aux_data(csr::kIndPtr).dptr<RType>(),
159  indptr_shape[0] - 1, idx_shape[0]);
160  // no need to check indices if indices are empty
161  if (idx_shape[0] != 0) {
162  Kernel<csr_idx_check, xpu>::Launch(s, indptr_shape[0] - 1, val_xpu.dptr<DType>(),
163  input.aux_data(csr::kIdx).dptr<IType>(),
164  input.aux_data(csr::kIndPtr).dptr<RType>(), shape[1]);
165  }
166  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
167  val_xpu.get<xpu, 1, DType>(s), s);
168  });
169  });
170  });
171  }
172 }
173 
182 template<typename xpu>
183 void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input,
184  const TBlob &err_cpu, const bool full_check) {
185  using namespace op::mxnet_op;
186  CHECK_EQ(input.storage_type(), kRowSparseStorage)
187  << "CheckFormatRSPImpl is for RSPNDArray";
188  const mxnet::TShape idx_shape = input.aux_shape(rowsparse::kIdx);
189  if (idx_shape[0] != input.storage_shape()[0]) {
190  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
191  DType* err = err_cpu.dptr<DType>();
192  *err = kRSPShapeErr;
193  });
194  return;
195  }
196  if (idx_shape[0] == 0) {
197  return;
198  }
199  if (full_check) {
200  MSHADOW_TYPE_SWITCH(err_cpu.type_flag_, DType, {
201  MSHADOW_IDX_TYPE_SWITCH(input.aux_type(rowsparse::kIdx), IType, {
202  mshadow::Stream<xpu> *s = rctx.get_stream<xpu>();
203  NDArray ret_xpu = NDArray(mshadow::Shape1(1),
204  rctx.get_ctx(), false, err_cpu.type_flag_);
205  TBlob val_xpu = ret_xpu.data();
206  Kernel<set_to_int<kNormalErr>, xpu>::Launch(s, val_xpu.Size(), val_xpu.dptr<DType>());
207 
208  Kernel<rsp_idx_check, xpu>::Launch(s, idx_shape[0],
209  val_xpu.dptr<DType>(), input.aux_data(rowsparse::kIdx).dptr<IType>(),
210  idx_shape[0] - 1, input.shape()[0]);
211  mshadow::Copy(err_cpu.get<cpu, 1, DType>(),
212  val_xpu.get<xpu, 1, DType>(s), s);
213  });
214  });
215  }
216 }
217 
218 template<typename xpu>
219 void CheckFormatImpl(const RunContext &rctx, const NDArray &input,
220  const TBlob &err_cpu, const bool full_check) {
221  int stype = input.storage_type();
222  if (stype == kCSRStorage) {
223  CheckFormatCSRImpl<xpu>(rctx, input, err_cpu, full_check);
224  } else if (stype == kRowSparseStorage) {
225  CheckFormatRSPImpl<xpu>(rctx, input, err_cpu, full_check);
226  } else if (stype == kDefaultStorage) {
227  // no-op for default storage
228  } else {
229  LOG(FATAL) << "Unknown storage type " << stype;
230  }
231 }
232 
236 template<typename xpu>
238  const NDArray& input_nd,
239  const TBlob& idx_data,
240  const OpReqType req,
241  NDArray* output_nd);
242 
243 /* \brief Casts tensor storage type to the new type.
244  */
245 template<typename xpu>
246 void CastStorageDispatch(const OpContext& ctx, const NDArray& input, const NDArray& output);
247 
251 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
252  const NDArrayStorageType stype) {
253  if (!vstorage.empty()) {
254  for (const auto& i : vstorage) {
255  if (i != stype) return false;
256  }
257  return true;
258  }
259  return false;
260 }
261 
266 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
267  const NDArrayStorageType stype1,
268  const NDArrayStorageType stype2,
269  bool *has_both) {
270  if (has_both) {
271  *has_both = false;
272  }
273  if (!vstorage.empty()) {
274  uint8_t has = 0;
275  for (const auto i : vstorage) {
276  if (i == stype1) {
277  has |= 1;
278  } else if (i == stype2) {
279  has |= 2;
280  } else {
281  return false;
282  }
283  }
284  if (has_both) {
285  *has_both = has == 3;
286  }
287  return true;
288  }
289  return false;
290 }
291 
295 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
296  const NDArrayStorageType stype) {
297  if (!ndarrays.empty()) {
298  for (const auto& nd : ndarrays) {
299  if (nd.storage_type() != stype) {
300  return false;
301  }
302  }
303  return true;
304  }
305  return false;
306 }
307 
311 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
312  const NDArrayStorageType stype1,
313  const NDArrayStorageType stype2,
314  bool *has_both) {
315  if (has_both) {
316  *has_both = false;
317  }
318  if (!ndarrays.empty()) {
319  uint8_t has = 0;
320  for (const auto& nd : ndarrays) {
321  const NDArrayStorageType stype = nd.storage_type();
322  if (stype == stype1) {
323  has |= 1;
324  } else if (stype == stype2) {
325  has |= 2;
326  } else {
327  return false;
328  }
329  }
330  if (has_both) {
331  *has_both = has == 3;
332  }
333  return true;
334  }
335  return false;
336 }
337 
341 inline bool ContainsStorageType(const std::vector<NDArray>& ndarrays,
342  const NDArrayStorageType stype) {
343  if (!ndarrays.empty()) {
344  for (const auto& nd : ndarrays) {
345  if (nd.storage_type() == stype) {
346  return true;
347  }
348  }
349  }
350  return false;
351 }
352 
356 inline bool ContainsStorageType(const std::vector<int>& ndstypes,
357  const NDArrayStorageType stype) {
358  if (!ndstypes.empty()) {
359  for (const auto& ndstype : ndstypes) {
360  if (ndstype == stype) {
361  return true;
362  }
363  }
364  }
365  return false;
366 }
367 
369 inline std::string dispatch_mode_string(const DispatchMode x) {
370  switch (x) {
372  return "fcompute";
374  return "fcompute_ex";
376  return "fcompute_fallback";
378  return "variable";
380  return "undefined";
381  }
382  return "unknown";
383 }
384 
385 
387 inline std::string stype_string(const int x) {
388  switch (x) {
389  case kDefaultStorage:
390  return "default";
391  case kCSRStorage:
392  return "csr";
393  case kRowSparseStorage:
394  return "row_sparse";
395  }
396  return "unknown";
397 }
398 
400 inline std::string dev_type_string(const int dev_type) {
401  switch (dev_type) {
402  case Context::kCPU:
403  return "cpu";
404  case Context::kGPU:
405  return "gpu";
406  case Context::kCPUPinned:
407  return "cpu_pinned";
408  case Context::kCPUShared:
409  return "cpu_shared";
410  }
411  return "unknown";
412 }
413 
414 inline std::string attr_value_string(const nnvm::NodeAttrs& attrs,
415  const std::string& attr_name,
416  std::string default_val = "") {
417  if (attrs.dict.find(attr_name) == attrs.dict.end()) {
418  return default_val;
419  }
420  return attrs.dict.at(attr_name);
421 }
422 
424 inline std::string operator_stype_string(const nnvm::NodeAttrs& attrs,
425  const int dev_mask,
426  const std::vector<int>& in_attrs,
427  const std::vector<int>& out_attrs) {
428  std::ostringstream os;
429  os << "operator = " << attrs.op->name
430  << "\ninput storage types = [";
431  for (const int attr : in_attrs) {
432  os << stype_string(attr) << ", ";
433  }
434  os << "]\n"
435  << "output storage types = [";
436  for (const int attr : out_attrs) {
437  os << stype_string(attr) << ", ";
438  }
439  os << "]\n"
440  << "params = {";
441  for (auto kv : attrs.dict) {
442  os << "\"" << kv.first << "\" : " << kv.second << ", ";
443  }
444  os << "}\n"
445  << "context.dev_mask = " << dev_type_string(dev_mask);
446  return os.str();
447 }
448 
450 inline std::string operator_string(const nnvm::NodeAttrs& attrs,
451  const OpContext& ctx,
452  const std::vector<NDArray>& inputs,
453  const std::vector<OpReqType>& req,
454  const std::vector<NDArray>& outputs) {
455  std::string result = "";
456  std::vector<int> in_stypes;
457  std::vector<int> out_stypes;
458  in_stypes.reserve(inputs.size());
459  out_stypes.reserve(outputs.size());
460  auto xform = [](const NDArray arr) -> int { return arr.storage_type(); };
461  std::transform(inputs.begin(), inputs.end(), std::back_inserter(in_stypes), xform);
462  std::transform(outputs.begin(), outputs.end(), std::back_inserter(out_stypes), xform);
463  result += operator_stype_string(attrs, ctx.run_ctx.ctx.dev_mask(), in_stypes, out_stypes);
464  return result;
465 }
466 
468 inline void LogOnce(const std::string& message) {
470  auto log_store = LogStore::Get();
471  if (log_store->find(message) == log_store->end()) {
472  LOG(INFO) << message;
473  log_store->insert(message);
474  }
475 }
476 
479 inline void LogStorageFallback(const nnvm::NodeAttrs& attrs,
480  const int dev_mask,
481  const std::vector<int>* in_attrs,
482  const std::vector<int>* out_attrs) {
483  static bool log = dmlc::GetEnv("MXNET_STORAGE_FALLBACK_LOG_VERBOSE", true);
484  if (!log) return;
485  const std::string op_str = operator_stype_string(attrs, dev_mask, *in_attrs, *out_attrs);
486  std::ostringstream os;
487  const char* warning = "\nThe operator with default storage type will be dispatched "
488  "for execution. You're seeing this warning message because the operator above is unable "
489  "to process the given ndarrays with specified storage types, context and parameter. "
490  "Temporary dense ndarrays are generated in order to execute the operator. "
491  "This does not affect the correctness of the programme. "
492  "You can set environment variable MXNET_STORAGE_FALLBACK_LOG_VERBOSE to "
493  "0 to suppress this warning.";
494  os << "\nStorage type fallback detected:\n" << op_str << warning;
495  LogOnce(os.str());
496 #if MXNET_USE_MKLDNN == 1
497  if (!MKLDNNEnvSet()) common::LogOnce("MXNET_MKLDNN_ENABLED flag is off. "
498  "You can re-enable by setting MXNET_MKLDNN_ENABLED=1");
499  if (GetMKLDNNCacheSize() != -1) common::LogOnce("MXNET_MKLDNN_CACHE_NUM is set."
500  "Should only be set if "
501  "your model has variable input shapes, "
502  "as cache size may grow unbounded");
503 #endif
504 }
505 
506 // heuristic to dermine number of threads per GPU
507 inline int GetNumThreadsPerGPU() {
508  // This is resource efficient option.
509  return dmlc::GetEnv("MXNET_GPU_WORKER_NTHREADS", 2);
510 }
511 
512 // heuristic to get number of matching colors.
513 // this decides how much parallelism we can get in each GPU.
514 inline int GetExecNumMatchColor() {
515  // This is resource efficient option.
516  int num_match_color = dmlc::GetEnv("MXNET_EXEC_NUM_TEMP", 1);
517  return std::min(num_match_color, GetNumThreadsPerGPU());
518 }
519 
520 template<typename T, typename V>
521 V ParallelAccumulate(const T* a, const int n, V start) {
522  V sum = start;
523 #pragma omp parallel for reduction(+:sum)
524  for (int i = 0; i < n; ++i) {
525  sum += a[i];
526  }
527  return sum;
528 }
529 
537 template<typename RandomIt, typename Compare>
538 void ParallelSortHelper(RandomIt first, size_t len,
539  size_t grainsize, const Compare& comp) {
540  if (len < grainsize) {
541  std::sort(first, first+len, comp);
542  } else {
543  std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
544  ParallelSortHelper(first+len/2, len - len/2, grainsize, comp);
545  thr.join();
546  std::inplace_merge(first, first+len/2, first+len, comp);
547  }
548 }
549 
559 template<typename RandomIt, typename Compare>
560 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp) {
561  const auto num = std::distance(first, last);
562  size_t grainsize = std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
563  ParallelSortHelper(first, num, grainsize, comp);
564 }
565 
575 template<typename RandomIt>
576 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads) {
577  ParallelSort(first, last, num_threads,
578  std::less<typename std::iterator_traits<RandomIt>::value_type>());
579 }
580 
584 typedef std::mt19937 RANDOM_ENGINE;
585 
589 namespace helper {
590 
594 template <class T>
595 struct UniqueIf {
599  using SingleObject = std::unique_ptr<T>;
600 };
601 
605 template <class T>
606 struct UniqueIf<T[]> {
610  using UnknownBound = std::unique_ptr<T[]>;
611 };
612 
616 template <class T, size_t kSize>
617 struct UniqueIf<T[kSize]> {
621  using KnownBound = void;
622 };
623 
624 } // namespace helper
625 
637 template <class T, class... Args>
639  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
640 }
641 
651 template <class T>
653  using U = typename std::remove_extent<T>::type;
654  return std::unique_ptr<T>(new U[n]{});
655 }
656 
665 template <class T, class... Args>
666 typename helper::UniqueIf<T>::KnownBound MakeUnique(Args&&... args) = delete;
667 
668 template<typename FCompType>
669 FCompType GetFCompute(const nnvm::Op* op, const std::string& name,
670  const Context& ctx) {
671  static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + "<cpu>");
672  static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + "<gpu>");
673 
674  if (ctx.dev_mask() == cpu::kDevMask) {
675  return fcompute_cpu.get(op, nullptr);
676  } else if (ctx.dev_mask() == gpu::kDevMask) {
677  return fcompute_gpu.get(op, nullptr);
678  } else {
679  LOG(FATAL) << "Unknown device mask " << ctx.dev_mask();
680  return nullptr;
681  }
682 }
683 
687 template <typename T>
688 constexpr size_t MaxIntegerValue() {
689  return std::is_integral<T>::value ?
690  std::numeric_limits<T>::max():
691  size_t(2) << (std::numeric_limits<T>::digits - 1);
692 }
693 
694 template <>
695 constexpr size_t MaxIntegerValue<mshadow::half::half_t>() {
696  return size_t(2) << 10;
697 }
698 
699 template <>
700 constexpr size_t MaxIntegerValue<mshadow::bfloat::bf16_t>() {
701  return size_t(2) << 14;
702 }
703 
704 MSHADOW_XINLINE int ilog2ul(size_t a) {
705  int k = 1;
706  while (a >>= 1) ++k;
707  return k;
708 }
709 
710 MSHADOW_XINLINE int ilog2ui(unsigned int a) {
711  int k = 1;
712  while (a >>= 1) ++k;
713  return k;
714 }
715 
719 inline NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape,
720  const Context &ctx, const int dtype) {
721  // NDArray with default storage
722  if (stype == kDefaultStorage) {
723  NDArray ret(shape, ctx, false, dtype);
724  ret = 0;
725  return ret;
726  }
727  // NDArray with non-default storage. Storage allocation is always delayed.
728  return NDArray(stype, shape, ctx, true, dtype);
729 }
730 
734 inline void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape &shape,
735  const Context &ctx, const int dtype,
736  std::vector<NDArray> *vec) {
737  // NDArray with default storage
738  if (stype == kDefaultStorage) {
739  vec->emplace_back(shape, ctx, false, dtype);
740  vec->back() = 0;
741  } else {
742  // NDArray with non-default storage. Storage allocation is always delayed.
743  vec->emplace_back(stype, shape, ctx, true, dtype);
744  }
745 }
746 
747 
751 template<typename DType>
752 inline void ParallelCopy(DType* dst, const DType* src, index_t size) {
753  static index_t copy_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_SIZE", 200000);
754  if (size >= copy_block_size) {
755  #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
756  for (index_t i = 0; i < size; ++i) {
757  dst[i] = src[i];
758  }
759  } else {
760  std::memcpy(dst, src, sizeof(DType) * size);
761  }
762 }
763 
767 template<typename DType>
768 inline void ParallelAdd(DType* dst, const DType* src, index_t size) {
769  static index_t add_block_size = dmlc::GetEnv("MXNET_CPU_PARALLEL_SIZE", 200000);
770  if (size >= add_block_size) {
771  #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount())
772  for (index_t i = 0; i < size; ++i) {
773  dst[i] += src[i];
774  }
775  } else {
776  for (index_t i = 0; i < size; ++i) {
777  dst[i] += src[i];
778  }
779  }
780 }
781 
800 inline void ConvertToNumpyShape(mxnet::TShape* shape) {
801  if (shape->ndim() == 0) { // legacy shape ndim = 0 means unknown
802  *shape = mxnet::TShape(); // unknown shape ndim = -1
803  } else {
804  for (int j = 0; j < shape->ndim(); ++j) {
805  if ((*shape)[j] == 0) { // legacy shape dim_size = 0 means unknown
806  (*shape)[j] = -1; // unknown dim size = -1
807  }
808  }
809  }
810 }
811 
813  for (size_t i = 0; i < shapes->size(); ++i) {
814  ConvertToNumpyShape(&(shapes->at(i)));
815  }
816 }
817 
822 inline void ConvertToLegacyShape(mxnet::TShape* shape) {
823  if (!mxnet::ndim_is_known(*shape)) {
824  *shape = mxnet::TShape(0, -1);
825  } else {
826  for (int j = 0; j < shape->ndim(); ++j) {
827  if (!mxnet::dim_size_is_known(*shape, j)) {
828  (*shape)[j] = 0;
829  }
830  }
831  }
832 }
833 
835  for (size_t i = 0; i < shapes->size(); ++i) {
836  ConvertToLegacyShape(&(shapes->at(i)));
837  }
838 }
840  const nnvm::IndexedGraph &idx, const std::vector<NDArray *> &state_arrays,
841  size_t nid, const std::function<void(const char *, const char *, void *)>
842  &monitor_callback);
843 
845  const nnvm::IndexedGraph &idx, const std::vector<NDArray *> &state_arrays,
846  size_t nid, const std::function<void(const char *, const char *, void *)>
847  &monitor_callback);
848 
852 static inline std::string GetOutputName(const nnvm::NodeEntry& e) {
853  nnvm::Symbol sym;
854  sym.outputs.push_back(e);
855  return sym.ListOutputNames()[0];
856 }
857 
859  // convert negative axes to positive values
860  const int ndim = src.ndim();
861  mxnet::TShape axes = src;
862  for (int i = 0; i < ndim; ++i) {
863  if (axes[i] < 0) {
864  axes[i] += ndim;
865  }
866  CHECK(axes[i] >= 0 && axes[i] < ndim) << "axes[" << i << "]="
867  << axes[i] << " exceeds the range ["
868  << 0 << ", " << ndim << ")";
869  }
870  return axes;
871 }
872 
873 inline bool is_float(const int dtype) {
874  return dtype == mshadow::kFloat32 || dtype == mshadow::kFloat64 || dtype == mshadow::kFloat16;
875 }
876 
877 inline bool is_int(const int dtype) {
878  return dtype == mshadow::kUint8 || dtype == mshadow::kInt8 ||
879  dtype == mshadow::kInt32 || dtype == mshadow::kInt64;
880 }
881 
882 inline int get_more_precise_type(const int type1, const int type2) {
883  if (type1 == type2) return type1;
884  if (is_float(type1) && is_float(type2)) {
885  if (type1 == mshadow::kFloat64 || type2 == mshadow::kFloat64) {
886  return mshadow::kFloat64;
887  }
888  if (type1 == mshadow::kFloat32 || type2 == mshadow::kFloat32) {
889  return mshadow::kFloat32;
890  }
891  return mshadow::kFloat16;
892  } else if (is_float(type1) || is_float(type2)) {
893  return is_float(type1) ? type1 : type2;
894  }
895  if (type1 == mshadow::kInt64 || type2 == mshadow::kInt64) {
896  return mshadow::kInt64;
897  }
898  if (type1 == mshadow::kInt32 || type2 == mshadow::kInt32) {
899  return mshadow::kInt32;
900  }
901  CHECK(!((type1 == mshadow::kUint8 && type2 == mshadow::kInt8) ||
902  (type1 == mshadow::kInt8 && type2 == mshadow::kUint8)))
903  << "1 is UInt8 and 1 is Int8 should not get here";
904  if (type1 == mshadow::kUint8 || type2 == mshadow::kUint8) {
905  return mshadow::kUint8;
906  }
907  return mshadow::kInt8;
908 }
909 
910 inline int np_binary_out_infer_type(const int type1, const int type2) {
911  if ((type1 == mshadow::kUint8 && type2 == mshadow::kInt8) ||
912  (type1 == mshadow::kInt8 && type2 == mshadow::kUint8)) {
913  return mshadow::kInt32;
914  }
915  return get_more_precise_type(type1, type2);
916 }
917 
918 inline int GetDefaultDtype() {
922 }
923 
924 inline int GetDefaultDtype(int dtype) {
925  if (dtype != -1) return dtype;
929 }
930 
931 } // namespace common
932 } // namespace mxnet
933 #endif // MXNET_COMMON_UTILS_H_
Definition: base.h:359
Definition: ndarray.h:73
std::vector< std::string > ListOutputNames() const
List the names of outputs for this symbol.
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const RType *indptr, const nnvm::dim_t ncols)
Definition: utils.h:90
Definition: ndarray.h:62
NDArrayStorageType
Definition: ndarray.h:60
void CheckFormatCSRImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of CSRNDArray.
Definition: utils.h:129
Definition: ndarray.h:53
Engine that schedules all the operations according to dependency.
void CheckFormatImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: utils.h:219
int GetNumThreadsPerGPU()
Definition: utils.h:507
void SparseRetainOpForwardRspWrapper(mshadow::Stream< xpu > *s, const NDArray &input_nd, const TBlob &idx_data, const OpReqType req, NDArray *output_nd)
Pick rows specified by user input index array from a row sparse ndarray and save them in the output s...
const mxnet::TShape & aux_shape(size_t index) const
get the shape of aux_data(index)
Definition: ndarray.h:241
The attributes of the current operation node. Usually are additional parameters like axis...
Definition: node.h:119
std::vector< NodeEntry > outputs
output entries contained in the symbol
Definition: symbolic.h:73
std::string operator_stype_string(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > &in_attrs, const std::vector< int > &out_attrs)
get string representation of the operator stypes
Definition: utils.h:424
namespace of mxnet
Definition: api_registry.h:33
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:119
void KnownBound
Type of T.
Definition: utils.h:621
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:538
int64_t dim_t
data type to store dim size
Definition: tuple.h:38
int type_flag_
type flag of the tensor blob
Definition: tensor_blob.h:73
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:669
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:521
void LogOnce(const std::string &message)
log message once. Intended for storage fallback warning messages.
Definition: utils.h:468
int GetDefaultDtype()
Definition: utils.h:918
Context ctx
base Context
Definition: base.h:351
A threadlocal store to store threadlocal variables. Will return a thread local singleton of type T...
Definition: thread_local.h:35
Definition: ndarray.h:71
execution time context. The information needed in runtime for actual execution.
Definition: base.h:349
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:122
void ExecuteMonInputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray *> &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:387
Graph node data structure.
Definition: ndarray.h:64
Data structures that can appear in graph attributes.
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
void CheckFormatWrapper(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Definition: base.h:104
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:560
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:251
std::string operator_string(const nnvm::NodeAttrs &attrs, const OpContext &ctx, const std::vector< NDArray > &inputs, const std::vector< OpReqType > &req, const std::vector< NDArray > &outputs)
get string representation of the operator
Definition: utils.h:450
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:50
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:584
#define MSHADOW_XINLINE
Definition: base.h:230
std::vector< mxnet::TShape > ShapeVector
The result holder of shape of each NodeEntry in the graph.
Definition: tuple.h:819
Indices of RSPNDArray should be non-negative, less than the size of first dimension and in ascending ...
Definition: utils.h:106
const Op * op
The operator this node uses. For place holder variable, op == nullptr.
Definition: node.h:124
Definition: ndarray.h:57
Definition: base.h:106
Auxiliary data structure to index a graph. It maps Nodes in the graph to consecutive integers node_id...
Definition: graph.h:107
NDArrayStorageType storage_type() const
Definition: ndarray.h:321
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:369
std::string dev_type_string(const int dev_type)
get string representation of device type
Definition: utils.h:400
bool is_np_default_dtype() const
return current numpy default dtype compatibility status.
Definition: imperative.h:123
Definition: base.h:364
Helper for non-array type T.
Definition: utils.h:595
bool dim_size_is_known(const dim_t dim_size)
Definition: tuple.h:419
Definition: base.h:105
Definition: ndarray.h:53
void ParallelAdd(DType *dst, const DType *src, index_t size)
Definition: utils.h:768
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&... args)
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:638
Definition: base.h:360
std::string name
name of the operator
Definition: op.h:106
Definition: ndarray.h:63
bool is_int(const int dtype)
Definition: utils.h:877
an entry that represents output data from a node
Definition: node.h:51
int np_binary_out_infer_type(const int type1, const int type2)
Definition: utils.h:910
IndPtr should be non-negative, in non-decreasing order, start with 0 and end with value equal with si...
Definition: utils.h:73
Definition: ndarray.h:70
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:610
Definition: base.h:363
Configuation of nnvm as well as basic data structure.
OpReqType
operation request type to Forward and Backward
Definition: op_attr_types.h:45
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:43
bool ContainsStorageType(const std::vector< NDArray > &ndarrays, const NDArrayStorageType stype)
returns true if storage type of any array in ndarrays is the same as the target stype. false is returned for empty inputs.
Definition: utils.h:341
constexpr size_t MaxIntegerValue()
Return the max integer value representable in the type T without loss of precision.
Definition: utils.h:688
void ExecuteMonOutputCallback(const nnvm::IndexedGraph &idx, const std::vector< NDArray *> &state_arrays, size_t nid, const std::function< void(const char *, const char *, void *)> &monitor_callback)
RunContext run_ctx
RunContext related resources.
Definition: op_attr_types.h:72
static Imperative * Get()
std::unordered_map< std::string, std::string > dict
The dictionary representation of attributes.
Definition: node.h:128
size_t current_process_id()
Definition: utils.h:67
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:599
Definition: base.h:362
A Shape class that is used to represent shape of each tensor.
Definition: tuple.h:437
void CheckFormatRSPImpl(const RunContext &rctx, const NDArray &input, const TBlob &err_cpu, const bool full_check)
Check the validity of RowSparseNDArray.
Definition: utils.h:183
const mxnet::TShape & storage_shape() const
Definition: ndarray.h:229
int GetExecNumMatchColor()
Definition: utils.h:514
header to handle OpenMP compatibility issues
#define MSHADOW_TYPE_SWITCH(type, DType,...)
Definition: base.h:1074
Definition: base.h:107
std::string attr_value_string(const nnvm::NodeAttrs &attrs, const std::string &attr_name, std::string default_val="")
Definition: utils.h:414
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *idx, const nnvm::dim_t end, const nnvm::dim_t nrows)
Definition: utils.h:108
mxnet::TShape CanonicalizeAxes(const mxnet::TShape &src)
Definition: utils.h:858
bool ndim_is_known(const int ndim)
Definition: tuple.h:413
int get_more_precise_type(const int type1, const int type2)
Definition: utils.h:882
const mxnet::TShape & shape() const
Definition: ndarray.h:221
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:94
MSHADOW_XINLINE int ilog2ul(size_t a)
Definition: utils.h:704
Definition: base.h:361
void LogStorageFallback(const nnvm::NodeAttrs &attrs, const int dev_mask, const std::vector< int > *in_attrs, const std::vector< int > *out_attrs)
log storage fallback event
Definition: utils.h:479
Definition: base.h:365
Context information about the execution environment.
Definition: base.h:101
Indices should be non-negative, less than the number of columns and in ascending order per row...
Definition: utils.h:88
bool is_float(const int dtype)
Definition: utils.h:873
int ndim() const
Definition: tuple.h:217
ndarray interface
Definition: ndarray.h:81
void ParallelCopy(DType *dst, const DType *src, index_t size)
parallelize copy by OpenMP.
Definition: utils.h:752
NDArray InitZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:719
void EmplaceBackZeros(const NDArrayStorageType stype, const mxnet::TShape &shape, const Context &ctx, const int dtype, std::vector< NDArray > *vec)
Helper to add a NDArray of zeros to a std::vector.
Definition: utils.h:734
MSHADOW_XINLINE int ilog2ui(unsigned int a)
Definition: utils.h:710
static MSHADOW_XINLINE void Map(int i, DType *out, const IType *indptr, const nnvm::dim_t end, const nnvm::dim_t idx_size)
Definition: utils.h:75
Symbol is help class used to represent the operator node in Graph.
Definition: symbolic.h:50
void ConvertToLegacyShape(mxnet::TShape *shape)
This is function is used to convert shapes returned by the infer shape functions/pass to the legacy s...
Definition: utils.h:822
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
Operator structure.
Definition: op.h:103
tensor blob class that can be used to hold tensor of any dimension, any device and any data type...
Definition: tensor_blob.h:65
void ConvertToNumpyShape(mxnet::TShape *shape)
If numpy compatibility is turned off (default), the shapes passed in by users follow the legacy shape...
Definition: utils.h:800
computaion stream structure, used for asynchronous computations
Definition: tensor.h:383