mxnet
utils.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef MXNET_COMMON_UTILS_H_
25 #define MXNET_COMMON_UTILS_H_
26 
27 #include <dmlc/logging.h>
28 #include <dmlc/omp.h>
29 #include <nnvm/graph.h>
30 #include <mxnet/engine.h>
31 #include <mxnet/ndarray.h>
32 #include <mxnet/op_attr_types.h>
33 #include <mxnet/graph_attr_types.h>
34 #include <nnvm/graph_attr_types.h>
35 
36 #include <memory>
37 #include <vector>
38 #include <type_traits>
39 #include <utility>
40 #include <random>
41 #include <string>
42 #include <thread>
43 #include <algorithm>
44 #include <functional>
45 
46 namespace mxnet {
47 namespace common {
48 
49 template<typename xpu>
50 void CastStorageDispatch(const OpContext& ctx, const NDArray& input, const NDArray& output);
51 
55 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
56  const NDArrayStorageType stype) {
57  if (!vstorage.empty()) {
58  for (const auto& i : vstorage) {
59  if (i != stype) return false;
60  }
61  return true;
62  }
63  return false;
64 }
65 
70 inline bool ContainsOnlyStorage(const StorageTypeVector& vstorage,
71  const NDArrayStorageType stype1,
72  const NDArrayStorageType stype2,
73  bool *has_both) {
74  if (has_both) {
75  *has_both = false;
76  }
77  if (!vstorage.empty()) {
78  uint8_t has = 0;
79  for (const auto i : vstorage) {
80  if (i == stype1) {
81  has |= 1;
82  } else if (i == stype2) {
83  has |= 2;
84  } else {
85  return false;
86  }
87  }
88  if (has_both) {
89  *has_both = has == 3;
90  }
91  return true;
92  }
93  return false;
94 }
95 
99 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
100  const NDArrayStorageType stype) {
101  if (!ndarrays.empty()) {
102  for (const auto& nd : ndarrays) {
103  if (nd.storage_type() != stype) {
104  return false;
105  }
106  }
107  return true;
108  }
109  return false;
110 }
111 
115 inline bool ContainsOnlyStorage(const std::vector<NDArray>& ndarrays,
116  const NDArrayStorageType stype1,
117  const NDArrayStorageType stype2,
118  bool *has_both) {
119  if (has_both) {
120  *has_both = false;
121  }
122  if (!ndarrays.empty()) {
123  uint8_t has = 0;
124  for (const auto& nd : ndarrays) {
125  const NDArrayStorageType stype = nd.storage_type();
126  if (stype == stype1) {
127  has |= 1;
128  } else if (stype == stype2) {
129  has |= 2;
130  } else {
131  return false;
132  }
133  }
134  if (has_both) {
135  *has_both = has == 3;
136  }
137  return true;
138  }
139  return false;
140 }
141 
143 inline std::string dispatch_mode_string(const DispatchMode x) {
144  switch (x) {
146  return "fcompute";
148  return "fcompute_ex";
150  return "fcompute_fallback";
152  return "variable";
154  return "undefined";
155  }
156  return "unknown";
157 }
158 
159 
161 inline std::string stype_string(const int x) {
162  switch (x) {
163  case kDefaultStorage:
164  return "default";
165  case kCSRStorage:
166  return "csr";
167  case kRowSparseStorage:
168  return "row_sparse";
169  }
170  return "unknown";
171 }
172 
173 // heuristic to dermine number of threads per GPU
174 inline int GetNumThreadPerGPU() {
175  // This is resource efficient option.
176  return dmlc::GetEnv("MXNET_GPU_WORKER_NTHREADS", 2);
177 }
178 
179 // heuristic to get number of matching colors.
180 // this decides how much parallelism we can get in each GPU.
181 inline int GetExecNumMatchColor() {
182  // This is resource efficient option.
183  int num_match_color = dmlc::GetEnv("MXNET_EXEC_NUM_TEMP", 1);
184  return std::min(num_match_color, GetNumThreadPerGPU());
185 }
186 
187 template<typename T, typename V>
188 V ParallelAccumulate(const T* a, const int n, V start) {
189  V sum = start;
190 #pragma omp parallel for reduction(+:sum)
191  for (int i = 0; i < n; ++i) {
192  sum += a[i];
193  }
194  return sum;
195 }
196 
204 template<typename RandomIt, typename Compare>
205 void ParallelSortHelper(RandomIt first, size_t len,
206  size_t grainsize, const Compare& comp) {
207  if (len < grainsize) {
208  std::sort(first, first+len, comp);
209  } else {
210  std::thread thr(ParallelSortHelper<RandomIt, Compare>, first, len/2, grainsize, comp);
211  ParallelSortHelper(first+len/2, len - len/2, grainsize, comp);
212  thr.join();
213  std::inplace_merge(first, first+len/2, first+len, comp);
214  }
215 }
216 
226 template<typename RandomIt, typename Compare>
227 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp) {
228  const auto num = std::distance(first, last);
229  size_t grainsize = std::max(num / num_threads + 5, static_cast<size_t>(1024*16));
230  ParallelSortHelper(first, num, grainsize, comp);
231 }
232 
242 template<typename RandomIt>
243 void ParallelSort(RandomIt first, RandomIt last, size_t num_threads) {
244  ParallelSort(first, last, num_threads,
245  std::less<typename std::iterator_traits<RandomIt>::value_type>());
246 }
247 
251 typedef std::mt19937 RANDOM_ENGINE;
252 
256 namespace helper {
257 
261 template <class T>
262 struct UniqueIf {
266  using SingleObject = std::unique_ptr<T>;
267 };
268 
272 template <class T>
273 struct UniqueIf<T[]> {
277  using UnknownBound = std::unique_ptr<T[]>;
278 };
279 
283 template <class T, size_t kSize>
284 struct UniqueIf<T[kSize]> {
288  using KnownBound = void;
289 };
290 
291 } // namespace helper
292 
304 template <class T, class... Args>
306  return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
307 }
308 
318 template <class T>
320  using U = typename std::remove_extent<T>::type;
321  return std::unique_ptr<T>(new U[n]{});
322 }
323 
332 template <class T, class... Args>
333 typename helper::UniqueIf<T>::KnownBound MakeUnique(Args&&... args) = delete;
334 
335 template<typename FCompType>
336 FCompType GetFCompute(const nnvm::Op* op, const std::string& name,
337  const Context& ctx) {
338  static auto& fcompute_cpu = nnvm::Op::GetAttr<FCompType>(name + "<cpu>");
339  static auto& fcompute_gpu = nnvm::Op::GetAttr<FCompType>(name + "<gpu>");
340 
341  if (ctx.dev_mask() == cpu::kDevMask) {
342  return fcompute_cpu.get(op, nullptr);
343  } else if (ctx.dev_mask() == gpu::kDevMask) {
344  return fcompute_gpu.get(op, nullptr);
345  } else {
346  LOG(FATAL) << "Unknown device mask";
347  return nullptr;
348  }
349 }
350 
351 } // namespace common
352 } // namespace mxnet
353 #endif // MXNET_COMMON_UTILS_H_
Definition: ndarray.h:60
NDArrayStorageType
Definition: ndarray.h:58
Engine that schedules all the operations according to dependency.
Symbol sort(const std::string &symbol_name, Symbol data, dmlc::optional< int > axis=dmlc::optional< int >(-1), bool is_ascend=1)
Definition: op.h:2487
namespace of mxnet
Definition: base.h:126
Additional operator attributes beside the ones provided by NNVM.
void KnownBound
Type of T.
Definition: utils.h:288
void ParallelSortHelper(RandomIt first, size_t len, size_t grainsize, const Compare &comp)
Helper function for ParallelSort. DO NOT call this function directly. Use the interface ParallelSort ...
Definition: utils.h:205
FCompType GetFCompute(const nnvm::Op *op, const std::string &name, const Context &ctx)
Definition: utils.h:336
V ParallelAccumulate(const T *a, const int n, V start)
Definition: utils.h:188
int GetNumThreadPerGPU()
Definition: utils.h:174
int dev_mask() const
Get corresponding device mask.
Definition: base.h:158
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:106
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:161
Symbol max(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=0, bool exclude=0)
Definition: op.h:2230
Definition: ndarray.h:62
void CastStorageDispatch(const OpContext &ctx, const NDArray &input, const NDArray &output)
void ParallelSort(RandomIt first, RandomIt last, size_t num_threads, Compare comp)
Sort the elements in the range [first, last) into the ascending order defined by the comparator comp...
Definition: utils.h:227
bool ContainsOnlyStorage(const StorageTypeVector &vstorage, const NDArrayStorageType stype)
returns true if all storage types in vstorage are the same as target stype. false is returned for emp...
Definition: utils.h:55
std::mt19937 RANDOM_ENGINE
Random Engine.
Definition: utils.h:251
Symbol sum(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=0, bool exclude=0)
Definition: op.h:2041
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:143
Helper for non-array type T.
Definition: utils.h:262
Data structures that can appear in graph attributes.
Definition: ndarray.h:61
std::unique_ptr< T[]> UnknownBound
Type of T.
Definition: utils.h:277
nnvm::Op Op
operator structure from NNVM
Definition: base.h:138
std::unique_ptr< T > SingleObject
Type of T.
Definition: utils.h:266
int GetExecNumMatchColor()
Definition: utils.h:181
Symbol min(const std::string &symbol_name, Symbol data, Shape axis=Shape(), bool keepdims=0, bool exclude=0)
Definition: op.h:2267
helper::UniqueIf< T >::SingleObject MakeUnique(Args &&...args)
Constructs an object of type T and wraps it in a std::unique_ptr.
Definition: utils.h:305
Context information about the execution environment.
Definition: base.h:141
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45