24 #ifndef MXNET_COMMON_EXEC_UTILS_H_ 25 #define MXNET_COMMON_EXEC_UTILS_H_ 27 #include <nnvm/graph.h> 28 #include <nnvm/pass_functions.h> 33 #include "../common/utils.h" 34 #include "../executor/exec_pass.h" 53 const std::vector<NDArray> *bufs,
54 std::vector<TBlob> *blobs,
55 std::vector<NDArray> *temp_src,
56 std::vector<NDArray> *temp_dst,
57 std::unordered_map<uint32_t, uint32_t> *idx_map) {
58 bool require_cast =
false;
59 for (
size_t i = 0; i < src.size(); i++) {
62 #if MXNET_USE_MKLDNN == 1 64 is_default = nd.IsDefaultData();
67 (*idx_map)[i] = temp_dst->size();
68 NDArray temp = bufs !=
nullptr ? bufs->at(i) :
NDArray(nd.shape(), nd.ctx(),
70 #if MXNET_USE_MKLDNN == 1 71 CHECK(temp.IsDefaultData());
73 temp_src->emplace_back(nd);
74 temp_dst->emplace_back(temp);
75 blobs->emplace_back(temp.
data());
78 blobs->push_back(nd.data());
85 const std::vector<NDArray> *bufs,
86 std::vector<OpReqType> *req,
87 std::vector<TBlob> *blobs,
88 std::vector<NDArray> *temp_src,
89 std::vector<NDArray> *temp_dst) {
90 bool require_cast =
false;
91 for (
size_t i = 0; i < src.size(); i++) {
94 #if MXNET_USE_MKLDNN == 1 102 is_default = nd.IsDefaultData();
105 #if MXNET_USE_MKLDNN == 1 107 if (bufs !=
nullptr) {
109 }
else if (
kAddTo == req->at(i) && nd.IsMKLDNNData()) {
110 temp = nd.Reorder2Default();
111 }
else if (
kAddTo == req->at(i)) {
114 temp =
NDArray(nd.shape(), nd.ctx(),
true, nd.dtype());
116 CHECK(temp.IsDefaultData());
118 NDArray temp = bufs !=
nullptr ? bufs->at(i) :
NDArray(nd.shape(), nd.ctx(),
121 temp_src->emplace_back(nd);
122 temp_dst->emplace_back(temp);
123 blobs->emplace_back(temp.data());
126 blobs->push_back(nd.data());
140 const std::vector<NDArray> &ndoutputs,
141 const std::vector<NDArray> *in_bufs,
142 const std::vector<NDArray> *out_bufs,
143 std::vector<OpReqType> *req,
144 std::vector<TBlob> *input_blobs,
145 std::vector<TBlob> *output_blobs,
146 std::vector<NDArray> *pre_temp_src,
147 std::vector<NDArray> *pre_temp_dst,
148 std::vector<NDArray> *post_temp_src,
149 std::vector<NDArray> *post_temp_dst,
150 std::unordered_map<uint32_t, uint32_t> *in_temp_idx_map,
151 const std::vector<uint32_t> &mutate_idx) {
159 for (
const auto idx : mutate_idx) {
160 auto map_iter = in_temp_idx_map->find(idx);
161 if (map_iter != in_temp_idx_map->end()) {
162 post_temp_src->push_back(pre_temp_dst->at(map_iter->second));
163 post_temp_dst->push_back(ndinputs[idx]);
176 const std::vector<NDArray>& dst,
179 CHECK_EQ(dst.size(), src.size());
180 for (
size_t i = 0; i < src.size(); i++) {
183 CastStorageDispatch<gpu>(ctx, src[i], dst[i]);
188 CastStorageDispatch<cpu>(ctx, src[i], dst[i]);
197 std::vector<int> *iattr,
198 std::vector<int> *oattr) {
200 for (
int v : *oattr) {
206 for (
int v : *iattr) {
212 if (def_v == -1)
return false;
213 for (
int& v : *oattr) {
216 for (
int& v : *iattr) {
231 std::vector<int> *iattr,
232 std::vector<int> *oattr) {
233 bool fallback =
false;
234 for (
int& v : *oattr) {
238 for (
int& v : *iattr) {
255 if (storage_id == -1) {
257 }
else if (storage_id == -2) {
258 str =
"external storage (-2)";
260 str =
"group " + std::to_string(storage_id);
286 const auto &idx = g.indexed_graph();
287 const auto& vshape = g.GetAttr<nnvm::ShapeVector>(
"shape");
288 const auto& vtype = g.GetAttr<nnvm::DTypeVector>(
"dtype");
289 const auto& vstorage = g.GetAttr<nnvm::StorageVector>(
"storage_id");
291 uint32_t node_start = 0, node_end = idx.num_nodes();
292 if (g.attrs.count(
"node_range")) {
293 const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >(
"node_range");
294 node_start = range.first;
295 node_end = range.second;
297 for (uint32_t nid = node_start; nid < node_end; ++nid) {
298 const auto& inode = idx[nid];
299 if (inode.source->is_variable()) {
300 LOG(INFO) <<
"node " << nid <<
" var";
302 LOG(INFO) <<
"node " << nid <<
" " << inode.source->attrs.op->name;
303 for (
const auto& e : inode.inputs) {
304 auto eid = idx.entry_id(e);
305 size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
306 LOG(INFO) <<
"\t\tinput " << eid <<
": " << vshape[eid] <<
" (" 307 << kilo_bytes <<
" KB) -> " <<
storage_str(vstorage[eid]);
309 for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
310 uint32_t eid = idx.entry_id(nid, index);
311 size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
312 LOG(INFO) <<
"\t\toutput " << eid <<
": " << vshape[eid] <<
" (" 313 << kilo_bytes <<
" KB) -> " <<
storage_str(vstorage[eid]);
345 const auto &idx = g.indexed_graph();
348 uint32_t node_start = 0, node_end = idx.num_nodes();
349 if (g.attrs.count(
"node_range")) {
350 const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >(
"node_range");
351 node_start = range.first;
352 node_end = range.second;
354 for (uint32_t nid = node_start; nid < node_end; ++nid) {
355 const auto& inode = idx[nid];
356 if (inode.source->is_variable()) {
357 LOG(INFO) <<
"node " << nid <<
" var";
359 LOG(INFO) <<
"node " << nid <<
" " << inode.source->attrs.op->name
361 for (
const auto& e : inode.inputs) {
362 auto eid = idx.entry_id(e);
363 LOG(INFO) <<
"\t\tinput " << eid <<
": " <<
stype_string(vstorage_type[eid]);
365 for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
366 uint32_t eid = idx.entry_id(nid, index);
367 LOG(INFO) <<
"\t\toutput " << eid <<
": " <<
stype_string(vstorage_type[eid]);
375 const nnvm::IndexedGraph& idx,
376 const nnvm::ShapeVector& inferred_shapes) {
378 std::ostringstream oss;
379 for (
size_t i = 0; i < num_forward_inputs; ++i) {
380 const uint32_t nid = idx.input_nodes().at(i);
381 const uint32_t eid = idx.entry_id(nid, 0);
382 const TShape& inferred_shape = inferred_shapes[eid];
383 if (inferred_shape.ndim() == 0 || inferred_shape.Size() == 0U) {
384 const std::string& arg_name = idx[nid].source->attrs.name;
385 oss << arg_name <<
": " << inferred_shape <<
", ";
392 LOG(FATAL) <<
"InferShape pass cannot decide shapes for the following arguments " 393 "(0s means unknown dimensions). Please consider providing them as inputs:\n" 399 const nnvm::IndexedGraph& idx,
400 const nnvm::DTypeVector& inferred_dtypes) {
402 std::ostringstream oss;
403 for (
size_t i = 0; i < num_forward_inputs; ++i) {
404 const uint32_t nid = idx.input_nodes().at(i);
405 const uint32_t eid = idx.entry_id(nid, 0);
406 const int inferred_dtype = inferred_dtypes[eid];
407 if (inferred_dtype == -1) {
408 const std::string& arg_name = idx[nid].source->attrs.name;
409 oss << arg_name <<
": " << inferred_dtype <<
", ";
416 LOG(FATAL) <<
"InferType pass cannot decide dtypes for the following arguments " 417 "(-1 means unknown dtype). Please consider providing them as inputs:\n" 423 const nnvm::IndexedGraph& idx,
426 std::ostringstream oss;
427 for (
size_t i = 0; i < num_forward_inputs; ++i) {
428 const uint32_t nid = idx.input_nodes().at(i);
429 const uint32_t eid = idx.entry_id(nid, 0);
430 const int inferred_stype = inferred_stypes[eid];
431 if (inferred_stype == -1) {
432 const std::string& arg_name = idx[nid].source->attrs.name;
440 LOG(FATAL) <<
"InferStorageType pass cannot decide storage type for the following arguments " 441 "(-1 means unknown stype). Please consider providing them as inputs:\n" 454 const TShape& dest_arg_shape,
455 const int dest_arg_dtype,
458 std::unordered_map<std::string, NDArray>* shared_buffer,
459 bool enable_row_sparse_sharing) {
461 if (enable_row_sparse_sharing) {
464 auto it = shared_buffer->find(name);
465 if (it != shared_buffer->end()) {
467 bool size_shareable = it->second.shape().Size() >= dest_arg_shape.Size();
468 if (size_shareable && stype_shareable) {
469 CHECK_EQ(it->second.dtype(), dest_arg_dtype)
470 <<
"Requested arg array's dtype does not match that of the reusable ndarray";
471 CHECK_EQ(it->second.storage_type(), dest_arg_stype)
472 <<
"Requested arg array's stype does not match that of the reusable ndarray";
473 return it->second.Reshape(dest_arg_shape);
474 }
else if (stype_shareable) {
475 LOG(WARNING) <<
"Bucketing: data " << name <<
" has a shape " << dest_arg_shape
476 <<
", which is larger than already allocated shape " << it->second.shape()
477 <<
". Need to re-allocate. Consider putting default bucket key to be " 478 <<
"the bucket taking the largest input for better memory sharing.";
481 it->second =
InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
485 return InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
488 auto ret =
InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
489 if (stype_shareable) {
490 shared_buffer->emplace(name, ret);
502 const std::map<std::string, Context>& ctx_map,
503 const std::vector<Context>& in_arg_ctxes,
504 const std::vector<Context>& arg_grad_ctxes,
505 const std::vector<Context>& aux_state_ctxes,
506 const std::vector<OpReqType>& grad_req_types,
507 size_t num_forward_inputs,
508 size_t num_forward_outputs) {
509 const auto& idx = g.indexed_graph();
510 const auto& mutable_nodes = idx.mutable_input_nodes();
512 if (ctx_map.size() == 0) {
513 g.attrs[
"context"] = std::make_shared<nnvm::any>(
514 exec::ContextVector(idx.num_nodes(), default_ctx));
515 for (
const auto& x : in_arg_ctxes) {
516 CHECK(x == default_ctx)
517 <<
"Input array is in " << x <<
" while binding with ctx=" << default_ctx
518 <<
". All arguments must be in global context (" << default_ctx
519 <<
") unless group2ctx is specified for cross-device graph.";
521 for (
const auto& x : arg_grad_ctxes) {
522 CHECK(x == default_ctx)
523 <<
"Gradient array is in " << x <<
" while binding with ctx=" 524 << default_ctx <<
". All gradients must be in global context (" << default_ctx
525 <<
") unless group2ctx is specified for cross-device graph.";
531 std::map<Context, int> ctx2id;
532 std::vector<Context> ctx_list;
533 nnvm::DeviceVector device(idx.num_nodes(), -1);
534 nnvm::DeviceAssignMap device_map;
538 for (
auto &kv : ctx_map) {
539 if (ctx2id.count(kv.second) == 0) {
540 ctx2id[kv.second] =
static_cast<int>(ctx_list.size());
541 ctx_list.push_back(kv.second);
544 device_map[kv.first] = ctx2id.at(kv.second);
549 size_t arg_top = 0, aux_top = 0;
550 for (
size_t i = 0; i < num_forward_inputs; ++i) {
551 const uint32_t nid = idx.input_nodes().at(i);
553 if (mutable_nodes.count(nid)) {
554 CHECK_LT(aux_top, aux_state_ctxes.size());
555 ctx = aux_state_ctxes[aux_top];
558 CHECK_LT(arg_top, in_arg_ctxes.size());
559 ctx = in_arg_ctxes[arg_top];
562 if (ctx2id.count(ctx) == 0) {
563 ctx2id[ctx] =
static_cast<int>(ctx_list.size());
564 ctx_list.push_back(ctx);
566 device[nid] = ctx2id.at(ctx);
571 size_t arg_grad_offset = 0;
574 CHECK_GE(grad_req_types.size(), g.outputs.size() - num_forward_outputs)
575 <<
"insufficient number of grad_reqs";
576 for (
size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
577 while (grad_req_types[arg_grad_offset] ==
kNullOp) ++arg_grad_offset;
578 const uint32_t nid = idx.outputs()[i].node_id;
579 Context ctx = arg_grad_ctxes[arg_grad_offset];
580 if (ctx2id.count(ctx) == 0) {
581 ctx2id[ctx] =
static_cast<int>(ctx_list.size());
582 ctx_list.push_back(ctx);
584 int devid = ctx2id.at(ctx);
585 if (device[nid] != -1) {
586 CHECK_EQ(device[nid], devid) <<
"device of same output not equal to each other";
592 g.attrs[
"device"] = std::make_shared<dmlc::any>(std::move(device));
593 g = nnvm::pass::PlaceDevice(g,
"__ctx_group__", device_map,
"_CrossDeviceCopy");
594 const auto& assigned_device = g.GetAttr<nnvm::DeviceVector>(
"device");
596 exec::ContextVector vcontext;
597 for (
size_t i = 0; i < assigned_device.size(); ++i) {
598 if (assigned_device[i] == -1) {
599 vcontext.push_back(default_ctx);
601 vcontext.push_back(ctx_list[assigned_device[i]]);
608 auto &new_idx = g.indexed_graph();
610 for (
size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
611 while (grad_req_types[arg_grad_offset] ==
kNullOp) ++arg_grad_offset;
612 const uint32_t nid = new_idx.outputs()[i].node_id;
613 Context ctx = arg_grad_ctxes[arg_grad_offset];
614 CHECK(ctx == vcontext[nid])
615 <<
"Trying to save gradient to " << ctx
616 <<
" while its source node \"" << new_idx[nid].source->attrs.name
617 <<
"\" computes it on " << vcontext[nid]
618 <<
". Check your ctx in NDArray allocation.";
621 g.attrs[
"context"] = std::make_shared<nnvm::any>(std::move(vcontext));
627 #endif // MXNET_COMMON_EXEC_UTILS_H_
NDArrayStorageType
Definition: ndarray.h:61
bool SameType(const nnvm::NodeAttrs &attrs, std::vector< int > *iattr, std::vector< int > *oattr)
The default type inference function, which assigns all undefined types to the same type of one of the...
Definition: exec_utils.h:196
#define MXNET_GPU_NOT_ENABLED_ERROR
Error message for using gpu when MXNET_USE_CUDA==0.
Definition: base.h:68
std::vector< DispatchMode > DispatchModeVector
The result holder of dispatch mode of each Node in the graph.
Definition: graph_attr_types.h:60
no operation, do not write anything
Definition: op_attr_types.h:47
write gradient to provided space
Definition: op_attr_types.h:49
namespace of mxnet
Definition: base.h:118
void HandleInferShapeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const nnvm::ShapeVector &inferred_shapes)
Definition: exec_utils.h:374
nnvm::TShape TShape
Shape data structure used to record shape information.
Definition: base.h:128
DispatchMode
the dispatch mode of the operator
Definition: op_attr_types.h:113
NDArray InitZeros(const NDArrayStorageType stype, const TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros.
Definition: utils.h:688
std::string stype_string(const int x)
get string representation of storage_type
Definition: utils.h:374
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
void HandleInferStorageTypeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const StorageTypeVector &inferred_stypes)
Definition: exec_utils.h:422
nnvm::Graph AssignContext(nnvm::Graph g, const Context &default_ctx, const std::map< std::string, Context > &ctx_map, const std::vector< Context > &in_arg_ctxes, const std::vector< Context > &arg_grad_ctxes, const std::vector< Context > &aux_state_ctxes, const std::vector< OpReqType > &grad_req_types, size_t num_forward_inputs, size_t num_forward_outputs)
Assign context to the graph. This is triggered by both simple_bind and bind flows.
Definition: exec_utils.h:500
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode
Definition: utils.h:356
void CastNonDefaultStorage(const std::vector< NDArray > &src, const std::vector< NDArray > &dst, const OpContext &ctx, const bool is_gpu)
Definition: exec_utils.h:175
std::string storage_str(int storage_id)
Definition: exec_utils.h:253
bool SetupDefaultBlobsIn(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst, std::unordered_map< uint32_t, uint32_t > *idx_map)
Definition: exec_utils.h:52
perform an inplace write, This option only happen when Target shares memory with one of input argumen...
Definition: op_attr_types.h:55
const TBlob & data() const
Definition: ndarray.h:225
void SetupDefaultBlobsInOut(const std::vector< NDArray > &ndinputs, const std::vector< NDArray > &ndoutputs, const std::vector< NDArray > *in_bufs, const std::vector< NDArray > *out_bufs, std::vector< OpReqType > *req, std::vector< TBlob > *input_blobs, std::vector< TBlob > *output_blobs, std::vector< NDArray > *pre_temp_src, std::vector< NDArray > *pre_temp_dst, std::vector< NDArray > *post_temp_src, std::vector< NDArray > *post_temp_dst, std::unordered_map< uint32_t, uint32_t > *in_temp_idx_map, const std::vector< uint32_t > &mutate_idx)
Definition: exec_utils.h:139
add to the provided space
Definition: op_attr_types.h:57
bool SetupDefaultBlobsOut(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< OpReqType > *req, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst)
Definition: exec_utils.h:84
void HandleInferTypeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const nnvm::DTypeVector &inferred_dtypes)
Definition: exec_utils.h:398
Context information about the execution environment.
Definition: base.h:133
NDArray ReshapeOrCreate(const std::string &name, const TShape &dest_arg_shape, const int dest_arg_dtype, const NDArrayStorageType dest_arg_stype, const Context &ctx, std::unordered_map< std::string, NDArray > *shared_buffer, bool enable_row_sparse_sharing)
If the requested ndarray's shape size is less than the corresponding shared_data_array's shape size a...
Definition: exec_utils.h:453
void LogInferStorage(const nnvm::Graph &g)
Definition: exec_utils.h:344
bool DefaultStorageType(const nnvm::NodeAttrs &attrs, const int dev_mask, DispatchMode *dispatch_mode, std::vector< int > *iattr, std::vector< int > *oattr)
The default storage type inference function, which assigns all undefined storage types to kDefaultSto...
Definition: exec_utils.h:228
ndarray interface
Definition: ndarray.h:82
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph.
Definition: graph_attr_types.h:45
void LogMemoryPlan(const nnvm::Graph &g)
Definition: exec_utils.h:285