mxnet
exec_utils.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
24 #ifndef MXNET_COMMON_EXEC_UTILS_H_
25 #define MXNET_COMMON_EXEC_UTILS_H_
26 
27 #include <nnvm/graph.h>
28 #include <nnvm/pass_functions.h>
29 #include <map>
30 #include <vector>
31 #include <string>
32 #include <utility>
33 #include "../common/utils.h"
34 #include "../executor/exec_pass.h"
35 
36 namespace mxnet {
37 namespace common {
38 
39 /*
40  * \brief setup default-storage tblobs from source NDArrays. If any source NDArray has non-default
41  * storage, it creates a temp NDArray with default storage and uses the temp tblob. The
42  * function also records the indices of non-default source NDArrays and the indices of
43  * their corresponding temporary NDArrays in the temp array.
44  * \param src list of source NDArray
45  * \param blobs list of tblobs to return
46  * \param temp_src list of source NDArrays which requires temporary default storage representation
47  * \param temp_dst list of temporary destination NDArrays for default storage representation
48  * \param idx_map mapping from indices in source NDArrays to indices in temp_dst. When not set,
49  indices are not recorded
50  * \return true if any source NDArray need to cast storage
51  */
52 inline bool SetupDefaultBlobsIn(const std::vector<NDArray>& src,
53  const std::vector<NDArray> *bufs,
54  std::vector<TBlob> *blobs,
55  std::vector<NDArray> *temp_src,
56  std::vector<NDArray> *temp_dst,
57  std::unordered_map<uint32_t, uint32_t> *idx_map) {
58  bool require_cast = false;
59  for (size_t i = 0; i < src.size(); i++) {
60  auto& nd = src[i];
61  bool is_default = nd.storage_type() == kDefaultStorage;
62 #if MXNET_USE_MKLDNN == 1
63  // We have to make sure it's default storage and default layout.
64  is_default = nd.IsDefaultData();
65 #endif
66  if (!is_default) {
67  (*idx_map)[i] = temp_dst->size();
68  NDArray temp = bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(),
69  true, nd.dtype());
70 #if MXNET_USE_MKLDNN == 1
71  CHECK(temp.IsDefaultData());
72 #endif
73  temp_src->emplace_back(nd);
74  temp_dst->emplace_back(temp);
75  blobs->emplace_back(temp.data());
76  require_cast = true;
77  } else {
78  blobs->push_back(nd.data());
79  }
80  }
81  return require_cast;
82 }
83 
84 inline bool SetupDefaultBlobsOut(const std::vector<NDArray>& src,
85  const std::vector<NDArray> *bufs,
86  std::vector<OpReqType> *req,
87  std::vector<TBlob> *blobs,
88  std::vector<NDArray> *temp_src,
89  std::vector<NDArray> *temp_dst) {
90  bool require_cast = false;
91  for (size_t i = 0; i < src.size(); i++) {
92  auto& nd = src[i];
93  bool is_default = nd.storage_type() == kDefaultStorage;
94 #if MXNET_USE_MKLDNN == 1
95  if (req->at(i) == kWriteInplace && nd.IsMKLDNNData())
96  // If it's write inplace and the output array doesn't use the default
97  // layout, we'll generate a temporary output array below, which means
98  // the input array and the output array are no longer the same array.
99  // we should change the request type.
100  req->at(i) = kWriteTo;
101  // We have to make sure it's default storage and default layout.
102  is_default = nd.IsDefaultData();
103 #endif
104  if (!is_default) {
105 #if MXNET_USE_MKLDNN == 1
106  NDArray temp;
107  if (bufs != nullptr) {
108  temp = bufs->at(i);
109  } else if (kAddTo == req->at(i) && nd.IsMKLDNNData()) {
110  temp = nd.Reorder2Default();
111  } else if (kAddTo == req->at(i)) {
112  temp = nd;
113  } else {
114  temp = NDArray(nd.shape(), nd.ctx(), true, nd.dtype());
115  }
116  CHECK(temp.IsDefaultData());
117 #else
118  NDArray temp = bufs != nullptr ? bufs->at(i) : NDArray(nd.shape(), nd.ctx(),
119  true, nd.dtype());
120 #endif
121  temp_src->emplace_back(nd);
122  temp_dst->emplace_back(temp);
123  blobs->emplace_back(temp.data());
124  require_cast = true;
125  } else {
126  blobs->push_back(nd.data());
127  }
128  }
129  return require_cast;
130 }
131 
132 /*
133  * \brief setup default-storage tblobs for input and output NDArrays.
134  * If any NDArray has non-default storage,
135  * it creates a temp NDArray with default storage and uses the temp tblob. The
136  * function also records the indices of non-default source NDArrays and the indices of
137  * their corresponding temporary NDArrays in the temp array.
138  */
139 inline void SetupDefaultBlobsInOut(const std::vector<NDArray> &ndinputs,
140  const std::vector<NDArray> &ndoutputs,
141  const std::vector<NDArray> *in_bufs,
142  const std::vector<NDArray> *out_bufs,
143  std::vector<OpReqType> *req,
144  std::vector<TBlob> *input_blobs,
145  std::vector<TBlob> *output_blobs,
146  std::vector<NDArray> *pre_temp_src,
147  std::vector<NDArray> *pre_temp_dst,
148  std::vector<NDArray> *post_temp_src,
149  std::vector<NDArray> *post_temp_dst,
150  std::unordered_map<uint32_t, uint32_t> *in_temp_idx_map,
151  const std::vector<uint32_t> &mutate_idx) {
152  // populate input blobs
153  SetupDefaultBlobsIn(ndinputs, in_bufs, input_blobs, pre_temp_src, pre_temp_dst,
154  in_temp_idx_map);
155  // populate output blobs
156  SetupDefaultBlobsOut(ndoutputs, out_bufs, req, output_blobs, post_temp_dst,
157  post_temp_src);
158  // add mutable inputs to post temp list
159  for (const auto idx : mutate_idx) {
160  auto map_iter = in_temp_idx_map->find(idx);
161  if (map_iter != in_temp_idx_map->end()) {
162  post_temp_src->push_back(pre_temp_dst->at(map_iter->second));
163  post_temp_dst->push_back(ndinputs[idx]);
164  }
165  }
166 }
167 
168 /*
169  * \brief cast the NDArrays in `src` and store the result in NDArrays in `dst`.
170  * This is only used for storage fallback in executor.
171  * \param src list of source NDArray to cast
172  * \param dst list of destionation NDArray which hold the result of cast_storage operation
173  * \param ctx operator context for cast_storage operation
174  */
175 inline void CastNonDefaultStorage(const std::vector<NDArray>& src,
176  const std::vector<NDArray>& dst,
177  const OpContext& ctx,
178  const bool is_gpu) {
179  CHECK_EQ(dst.size(), src.size());
180  for (size_t i = 0; i < src.size(); i++) {
181  if (is_gpu) {
182 #if MXNET_USE_CUDA
183  CastStorageDispatch<gpu>(ctx, src[i], dst[i]);
184 #else
185  LOG(FATAL) << MXNET_GPU_NOT_ENABLED_ERROR;
186 #endif
187  } else {
188  CastStorageDispatch<cpu>(ctx, src[i], dst[i]);
189  }
190  }
191 }
192 
196 inline bool SameType(const nnvm::NodeAttrs& attrs,
197  std::vector<int> *iattr,
198  std::vector<int> *oattr) {
199  int def_v = -1;
200  for (int v : *oattr) {
201  if (v != -1) {
202  def_v = v; break;
203  }
204  }
205  if (def_v == -1) {
206  for (int v : *iattr) {
207  if (v != -1) {
208  def_v = v; break;
209  }
210  }
211  }
212  if (def_v == -1) return false;
213  for (int& v : *oattr) {
214  v = def_v;
215  }
216  for (int& v : *iattr) {
217  v = def_v;
218  }
219  return true;
220 }
221 
222 
228 inline bool DefaultStorageType(const nnvm::NodeAttrs& attrs,
229  const int dev_mask,
230  DispatchMode* dispatch_mode,
231  std::vector<int> *iattr,
232  std::vector<int> *oattr) {
233  bool fallback = false;
234  for (int& v : *oattr) {
235  if (v == -1) v = kDefaultStorage;
236  if (v != kDefaultStorage) fallback = true;
237  }
238  for (int& v : *iattr) {
239  if (v == -1) v = kDefaultStorage;
240  if (v != kDefaultStorage) fallback = true;
241  }
242  if (*dispatch_mode == DispatchMode::kUndefined) {
243  if (fallback) {
244  *dispatch_mode = DispatchMode::kFComputeFallback;
245  } else {
246  *dispatch_mode = DispatchMode::kFCompute;
247  }
248  }
249  return true;
250 }
251 
252 // string representation of storage id
253 inline std::string storage_str(int storage_id) {
254  std::string str;
255  if (storage_id == -1) {
256  str = "var (-1)";
257  } else if (storage_id == -2) {
258  str = "external storage (-2)";
259  } else {
260  str = "group " + std::to_string(storage_id);
261  }
262  return str;
263 }
264 
265 /* log the static memory plan of the graph. Example:
266  node 0 var
267  node 1 _copy
268  input 0: [80,3,224,224] (47040 KB) -> var storage (-1)
269  output 1: [80,3,224,224] (47040 KB) -> group 0
270  node 2 var
271  node 3 var
272  node 4 var
273  node 5 var
274  node 6 BatchNorm
275  input 1: [80,3,224,224] (47040 KB) -> group 0
276  input 2: [3] (0 KB) -> var storage (-1)
277  input 3: [3] (0 KB) -> var storage (-1)
278  input 4: [3] (0 KB) -> var storage (-1)
279  input 5: [3] (0 KB) -> var storage (-1)
280  output 6: [80,3,224,224] (47040 KB) -> group 1
281  output 7: [3] (0 KB) -> group 3
282  output 8: [3] (0 KB) -> group 2
283  ...
284  */
285 inline void LogMemoryPlan(const nnvm::Graph& g) {
286  const auto &idx = g.indexed_graph();
287  const auto& vshape = g.GetAttr<nnvm::ShapeVector>("shape");
288  const auto& vtype = g.GetAttr<nnvm::DTypeVector>("dtype");
289  const auto& vstorage = g.GetAttr<nnvm::StorageVector>("storage_id");
290  // find node range
291  uint32_t node_start = 0, node_end = idx.num_nodes();
292  if (g.attrs.count("node_range")) {
293  const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >("node_range");
294  node_start = range.first;
295  node_end = range.second;
296  }
297  for (uint32_t nid = node_start; nid < node_end; ++nid) {
298  const auto& inode = idx[nid];
299  if (inode.source->is_variable()) {
300  LOG(INFO) << "node " << nid << " var";
301  } else {
302  LOG(INFO) << "node " << nid << " " << inode.source->attrs.op->name;
303  for (const auto& e : inode.inputs) {
304  auto eid = idx.entry_id(e);
305  size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
306  LOG(INFO) << "\t\tinput " << eid << ": " << vshape[eid] << " ("
307  << kilo_bytes << " KB) -> " << storage_str(vstorage[eid]);
308  }
309  for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
310  uint32_t eid = idx.entry_id(nid, index);
311  size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
312  LOG(INFO) << "\t\toutput " << eid << ": " << vshape[eid] << " ("
313  << kilo_bytes << " KB) -> " << storage_str(vstorage[eid]);
314  }
315  }
316  }
317 }
318 
319 /* log the static memory plan of the graph. Example:
320  node 0 var
321  node 1 _copy: fcompute
322  input 0: default
323  output 1: default
324  node 2 var
325  node 3 Convolution: fcompute
326  input 1: default
327  input 2: default
328  output 3: default
329  node 4 var
330  node 5 var
331  node 6 var
332  node 7 var
333  node 8 BatchNorm: fcompute
334  input 3: default
335  input 4: default
336  input 5: default
337  input 6: default
338  input 7: default
339  output 8: default
340  output 9: default
341  output 10: default
342  ...
343  */
344 inline void LogInferStorage(const nnvm::Graph& g) {
345  const auto &idx = g.indexed_graph();
346  const auto& vstorage_type = g.GetAttr<StorageTypeVector>("storage_type");
347  const auto& dispatch_modes = g.GetAttr<DispatchModeVector>("dispatch_mode");
348  uint32_t node_start = 0, node_end = idx.num_nodes();
349  if (g.attrs.count("node_range")) {
350  const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >("node_range");
351  node_start = range.first;
352  node_end = range.second;
353  }
354  for (uint32_t nid = node_start; nid < node_end; ++nid) {
355  const auto& inode = idx[nid];
356  if (inode.source->is_variable()) {
357  LOG(INFO) << "node " << nid << " var";
358  } else {
359  LOG(INFO) << "node " << nid << " " << inode.source->attrs.op->name
360  << ": " << dispatch_mode_string(dispatch_modes[nid]);
361  for (const auto& e : inode.inputs) {
362  auto eid = idx.entry_id(e);
363  LOG(INFO) << "\t\tinput " << eid << ": " << stype_string(vstorage_type[eid]);
364  }
365  for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
366  uint32_t eid = idx.entry_id(nid, index);
367  LOG(INFO) << "\t\toutput " << eid << ": " << stype_string(vstorage_type[eid]);
368  }
369  }
370  }
371 }
372 
373 // prints a helpful message after shape inference errors in executor.
374 inline void HandleInferShapeError(const size_t num_forward_inputs,
375  const nnvm::IndexedGraph& idx,
376  const nnvm::ShapeVector& inferred_shapes) {
377  int cnt = 10;
378  std::ostringstream oss;
379  for (size_t i = 0; i < num_forward_inputs; ++i) {
380  const uint32_t nid = idx.input_nodes().at(i);
381  const uint32_t eid = idx.entry_id(nid, 0);
382  const TShape& inferred_shape = inferred_shapes[eid];
383  if (inferred_shape.ndim() == 0 || inferred_shape.Size() == 0U) {
384  const std::string& arg_name = idx[nid].source->attrs.name;
385  oss << arg_name << ": " << inferred_shape << ", ";
386  if (--cnt == 0) {
387  oss << "...";
388  break;
389  }
390  }
391  }
392  LOG(FATAL) << "InferShape pass cannot decide shapes for the following arguments "
393  "(0s means unknown dimensions). Please consider providing them as inputs:\n"
394  << oss.str();
395 }
396 
397 // prints a helpful message after type inference errors in executor.
398 inline void HandleInferTypeError(const size_t num_forward_inputs,
399  const nnvm::IndexedGraph& idx,
400  const nnvm::DTypeVector& inferred_dtypes) {
401  int cnt = 10;
402  std::ostringstream oss;
403  for (size_t i = 0; i < num_forward_inputs; ++i) {
404  const uint32_t nid = idx.input_nodes().at(i);
405  const uint32_t eid = idx.entry_id(nid, 0);
406  const int inferred_dtype = inferred_dtypes[eid];
407  if (inferred_dtype == -1) {
408  const std::string& arg_name = idx[nid].source->attrs.name;
409  oss << arg_name << ": " << inferred_dtype << ", ";
410  if (--cnt == 0) {
411  oss << "...";
412  break;
413  }
414  }
415  }
416  LOG(FATAL) << "InferType pass cannot decide dtypes for the following arguments "
417  "(-1 means unknown dtype). Please consider providing them as inputs:\n"
418  << oss.str();
419 }
420 
421 // prints a helpful message after storage type checking errors in executor.
422 inline void HandleInferStorageTypeError(const size_t num_forward_inputs,
423  const nnvm::IndexedGraph& idx,
424  const StorageTypeVector& inferred_stypes) {
425  int cnt = 10;
426  std::ostringstream oss;
427  for (size_t i = 0; i < num_forward_inputs; ++i) {
428  const uint32_t nid = idx.input_nodes().at(i);
429  const uint32_t eid = idx.entry_id(nid, 0);
430  const int inferred_stype = inferred_stypes[eid];
431  if (inferred_stype == -1) {
432  const std::string& arg_name = idx[nid].source->attrs.name;
433  oss << arg_name << ": " << common::stype_string(inferred_stype) << ", ";
434  if (--cnt == 0) {
435  oss << "...";
436  break;
437  }
438  }
439  }
440  LOG(FATAL) << "InferStorageType pass cannot decide storage type for the following arguments "
441  "(-1 means unknown stype). Please consider providing them as inputs:\n"
442  << oss.str();
443 }
444 
453 inline NDArray ReshapeOrCreate(const std::string& name,
454  const TShape& dest_arg_shape,
455  const int dest_arg_dtype,
456  const NDArrayStorageType dest_arg_stype,
457  const Context& ctx,
458  std::unordered_map<std::string, NDArray>* shared_buffer,
459  bool enable_row_sparse_sharing) {
460  bool stype_shareable = dest_arg_stype == kDefaultStorage;
461  if (enable_row_sparse_sharing) {
462  stype_shareable = stype_shareable || dest_arg_stype == kRowSparseStorage;
463  }
464  auto it = shared_buffer->find(name);
465  if (it != shared_buffer->end()) {
466  // check if size is large enough for sharing
467  bool size_shareable = it->second.shape().Size() >= dest_arg_shape.Size();
468  if (size_shareable && stype_shareable) { // memory can be reused
469  CHECK_EQ(it->second.dtype(), dest_arg_dtype)
470  << "Requested arg array's dtype does not match that of the reusable ndarray";
471  CHECK_EQ(it->second.storage_type(), dest_arg_stype)
472  << "Requested arg array's stype does not match that of the reusable ndarray";
473  return it->second.Reshape(dest_arg_shape);
474  } else if (stype_shareable) {
475  LOG(WARNING) << "Bucketing: data " << name << " has a shape " << dest_arg_shape
476  << ", which is larger than already allocated shape " << it->second.shape()
477  << ". Need to re-allocate. Consider putting default bucket key to be "
478  << "the bucket taking the largest input for better memory sharing.";
479  // size is not large enough, creating a larger one for sharing
480  // the NDArrays in shared_buffer are guaranteed to be of shareable storages
481  it->second = InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
482  return it->second;
483  } else {
484  // not shareable storage
485  return InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
486  }
487  } else {
488  auto ret = InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
489  if (stype_shareable) {
490  shared_buffer->emplace(name, ret);
491  }
492  return ret;
493  } // if (it != shared_buffer->end())
494 }
495 
500 inline nnvm::Graph AssignContext(nnvm::Graph g,
501  const Context& default_ctx,
502  const std::map<std::string, Context>& ctx_map,
503  const std::vector<Context>& in_arg_ctxes,
504  const std::vector<Context>& arg_grad_ctxes,
505  const std::vector<Context>& aux_state_ctxes,
506  const std::vector<OpReqType>& grad_req_types,
507  size_t num_forward_inputs,
508  size_t num_forward_outputs) {
509  const auto& idx = g.indexed_graph();
510  const auto& mutable_nodes = idx.mutable_input_nodes();
511  // default use default context.
512  if (ctx_map.size() == 0) {
513  g.attrs["context"] = std::make_shared<nnvm::any>(
514  exec::ContextVector(idx.num_nodes(), default_ctx));
515  for (const auto& x : in_arg_ctxes) {
516  CHECK(x == default_ctx)
517  << "Input array is in " << x << " while binding with ctx=" << default_ctx
518  << ". All arguments must be in global context (" << default_ctx
519  << ") unless group2ctx is specified for cross-device graph.";
520  }
521  for (const auto& x : arg_grad_ctxes) {
522  CHECK(x == default_ctx)
523  << "Gradient array is in " << x << " while binding with ctx="
524  << default_ctx << ". All gradients must be in global context (" << default_ctx
525  << ") unless group2ctx is specified for cross-device graph.";
526  }
527  return g;
528  }
529 
530  // otherwise, use context assignment.
531  std::map<Context, int> ctx2id; // map ctx to device id
532  std::vector<Context> ctx_list; // index is device id
533  nnvm::DeviceVector device(idx.num_nodes(), -1); // index is node id
534  nnvm::DeviceAssignMap device_map; // map arg name to device id
535 
536  // loop through the user input ctx_map and
537  // populate maps and lists
538  for (auto &kv : ctx_map) {
539  if (ctx2id.count(kv.second) == 0) { // if context has no device id, create one
540  ctx2id[kv.second] = static_cast<int>(ctx_list.size()); // assign device id to ctx
541  ctx_list.push_back(kv.second); // save ctx to the list
542  }
543  // assign device id to to the arg name with the corresponding ctx
544  device_map[kv.first] = ctx2id.at(kv.second);
545  }
546 
547  // loop through all the rest of input nodes not specified
548  // in the ctx_map and populate maps and lists
549  size_t arg_top = 0, aux_top = 0;
550  for (size_t i = 0; i < num_forward_inputs; ++i) {
551  const uint32_t nid = idx.input_nodes().at(i);
552  Context ctx;
553  if (mutable_nodes.count(nid)) { // aux node is mutable
554  CHECK_LT(aux_top, aux_state_ctxes.size());
555  ctx = aux_state_ctxes[aux_top];
556  ++aux_top;
557  } else { // regular input node is immutable
558  CHECK_LT(arg_top, in_arg_ctxes.size());
559  ctx = in_arg_ctxes[arg_top];
560  ++arg_top;
561  }
562  if (ctx2id.count(ctx) == 0) { // if the current ctx is not in the map of ctx and device id
563  ctx2id[ctx] = static_cast<int>(ctx_list.size()); // assign the current ctx with device id
564  ctx_list.push_back(ctx); // save the current ctx in the list
565  }
566  device[nid] = ctx2id.at(ctx); // assign device id to the current node
567  }
568 
569  // loop through backward input nodes and populate maps and lists
570  // the backward input nodes is the gradient of the loss wrt the output
571  size_t arg_grad_offset = 0;
572  // keep an offset into the arg_grad_ctxes vector,
573  // since g.outputs exclude arg_grad whose req == null
574  CHECK_GE(grad_req_types.size(), g.outputs.size() - num_forward_outputs)
575  << "insufficient number of grad_reqs";
576  for (size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
577  while (grad_req_types[arg_grad_offset] == kNullOp) ++arg_grad_offset;
578  const uint32_t nid = idx.outputs()[i].node_id;
579  Context ctx = arg_grad_ctxes[arg_grad_offset];
580  if (ctx2id.count(ctx) == 0) {
581  ctx2id[ctx] = static_cast<int>(ctx_list.size());
582  ctx_list.push_back(ctx);
583  }
584  int devid = ctx2id.at(ctx);
585  if (device[nid] != -1) {
586  CHECK_EQ(device[nid], devid) << "device of same output not equal to each other";
587  } else {
588  device[nid] = devid;
589  }
590  }
591 
592  g.attrs["device"] = std::make_shared<dmlc::any>(std::move(device));
593  g = nnvm::pass::PlaceDevice(g, "__ctx_group__", device_map, "_CrossDeviceCopy");
594  const auto& assigned_device = g.GetAttr<nnvm::DeviceVector>("device");
595 
596  exec::ContextVector vcontext;
597  for (size_t i = 0; i < assigned_device.size(); ++i) {
598  if (assigned_device[i] == -1) {
599  vcontext.push_back(default_ctx);
600  } else {
601  vcontext.push_back(ctx_list[assigned_device[i]]);
602  }
603  }
604 
605  // after device planning, we should check again
606  // if the assigned device of gradient node
607  // corresponds to storage of grads
608  auto &new_idx = g.indexed_graph();
609  arg_grad_offset = 0;
610  for (size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
611  while (grad_req_types[arg_grad_offset] == kNullOp) ++arg_grad_offset;
612  const uint32_t nid = new_idx.outputs()[i].node_id;
613  Context ctx = arg_grad_ctxes[arg_grad_offset];
614  CHECK(ctx == vcontext[nid])
615  << "Trying to save gradient to " << ctx
616  << " while its source node \"" << new_idx[nid].source->attrs.name
617  << "\" computes it on " << vcontext[nid]
618  << ". Check your ctx in NDArray allocation.";
619  }
620 
621  g.attrs["context"] = std::make_shared<nnvm::any>(std::move(vcontext));
622  return g;
623 }
624 
625 } // namespace common
626 } // namespace mxnet
627 #endif // MXNET_COMMON_EXEC_UTILS_H_
628 
Definition: ndarray.h:63
NDArrayStorageType
Definition: ndarray.h:61
bool SameType(const nnvm::NodeAttrs &attrs, std::vector< int > *iattr, std::vector< int > *oattr)
The default type inference function, which assigns all undefined types to the same type of one of the...
Definition: exec_utils.h:196
#define MXNET_GPU_NOT_ENABLED_ERROR
Error message for using gpu when MXNET_USE_CUDA==0.
Definition: base.h:68
std::vector< DispatchMode > DispatchModeVector
The result holder of dispatch mode of each Node in the graph.
Definition: graph_attr_types.h:60
no operation, do not write anything
Definition: op_attr_types.h:47
write gradient to provided space
Definition: op_attr_types.h:49
namespace of mxnet
Definition: base.h:118
void HandleInferShapeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const nnvm::ShapeVector &inferred_shapes)
Definition: exec_utils.h:374
nnvm::TShape TShape
Shape data structure used to record shape information.
Definition: base.h:128
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:113
NDArray InitZeros(const NDArrayStorageType stype, const TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:688
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:374
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
void HandleInferStorageTypeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const StorageTypeVector &inferred_stypes)
Definition: exec_utils.h:422
nnvm::Graph AssignContext(nnvm::Graph g, const Context &default_ctx, const std::map< std::string, Context > &ctx_map, const std::vector< Context > &in_arg_ctxes, const std::vector< Context > &arg_grad_ctxes, const std::vector< Context > &aux_state_ctxes, const std::vector< OpReqType > &grad_req_types, size_t num_forward_inputs, size_t num_forward_outputs)
Assign context to the graph. This is triggered by both simple_bind and bind flows.
Definition: exec_utils.h:500
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:356
void CastNonDefaultStorage(const std::vector< NDArray > &src, const std::vector< NDArray > &dst, const OpContext &ctx, const bool is_gpu)
Definition: exec_utils.h:175
std::string storage_str(int storage_id)
Definition: exec_utils.h:253
Definition: ndarray.h:64
bool SetupDefaultBlobsIn(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst, std::unordered_map< uint32_t, uint32_t > *idx_map)
Definition: exec_utils.h:52
perform an inplace write, This option only happen when Target shares memory with one of input argumen...
Definition: op_attr_types.h:55
const TBlob & data() const
Definition: ndarray.h:225
void SetupDefaultBlobsInOut(const std::vector< NDArray > &ndinputs, const std::vector< NDArray > &ndoutputs, const std::vector< NDArray > *in_bufs, const std::vector< NDArray > *out_bufs, std::vector< OpReqType > *req, std::vector< TBlob > *input_blobs, std::vector< TBlob > *output_blobs, std::vector< NDArray > *pre_temp_src, std::vector< NDArray > *pre_temp_dst, std::vector< NDArray > *post_temp_src, std::vector< NDArray > *post_temp_dst, std::unordered_map< uint32_t, uint32_t > *in_temp_idx_map, const std::vector< uint32_t > &mutate_idx)
Definition: exec_utils.h:139
add to the provided space
Definition: op_attr_types.h:57
bool SetupDefaultBlobsOut(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< OpReqType > *req, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst)
Definition: exec_utils.h:84
void HandleInferTypeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const nnvm::DTypeVector &inferred_dtypes)
Definition: exec_utils.h:398
Context information about the execution environment.
Definition: base.h:133
NDArray ReshapeOrCreate(const std::string &name, const TShape &dest_arg_shape, const int dest_arg_dtype, const NDArrayStorageType dest_arg_stype, const Context &ctx, std::unordered_map< std::string, NDArray > *shared_buffer, bool enable_row_sparse_sharing)
If the requested ndarray&#39;s shape size is less than the corresponding shared_data_array&#39;s shape size a...
Definition: exec_utils.h:453
void LogInferStorage(const nnvm::Graph &g)
Definition: exec_utils.h:344
bool DefaultStorageType(const nnvm::NodeAttrs &attrs, const int dev_mask, DispatchMode *dispatch_mode, std::vector< int > *iattr, std::vector< int > *oattr)
The default storage type inference function, which assigns all undefined storage types to kDefaultSto...
Definition: exec_utils.h:228
ndarray interface
Definition: ndarray.h:82
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
void LogMemoryPlan(const nnvm::Graph &g)
Definition: exec_utils.h:285