24 #ifndef MXNET_COMMON_EXEC_UTILS_H_    25 #define MXNET_COMMON_EXEC_UTILS_H_    27 #include <nnvm/graph.h>    28 #include <nnvm/pass_functions.h>    33 #include "../common/utils.h"    34 #include "../executor/exec_pass.h"    53                                 const std::vector<NDArray> *bufs,
    54                                 std::vector<TBlob> *blobs,
    55                                 std::vector<NDArray> *temp_src,
    56                                 std::vector<NDArray> *temp_dst,
    57                                 std::unordered_map<uint32_t, uint32_t> *idx_map) {
    58   bool require_cast = 
false;
    59   for (
size_t i = 0; i < src.size(); i++) {
    62 #if MXNET_USE_MKLDNN == 1    64     is_default = nd.IsDefaultData();
    67       (*idx_map)[i] = temp_dst->size();
    68       NDArray temp = bufs != 
nullptr ? bufs->at(i) : 
NDArray(nd.shape(), nd.ctx(),
    70 #if MXNET_USE_MKLDNN == 1    71       CHECK(temp.IsDefaultData());
    73       temp_src->emplace_back(nd);
    74       temp_dst->emplace_back(temp);
    75       blobs->emplace_back(temp.
data());
    78       blobs->push_back(nd.data());
    85                                  const std::vector<NDArray> *bufs,
    86                                  std::vector<OpReqType> *req,
    87                                  std::vector<TBlob> *blobs,
    88                                  std::vector<NDArray> *temp_src,
    89                                  std::vector<NDArray> *temp_dst) {
    90   bool require_cast = 
false;
    91   for (
size_t i = 0; i < src.size(); i++) {
    94 #if MXNET_USE_MKLDNN == 1   102     is_default = nd.IsDefaultData();
   105 #if MXNET_USE_MKLDNN == 1   107       if (bufs != 
nullptr) {
   109       } 
else if (
kAddTo == req->at(i) && nd.IsMKLDNNData()) {
   110         temp = nd.Reorder2Default();
   111       } 
else if (
kAddTo == req->at(i)) {
   114         temp = 
NDArray(nd.shape(), nd.ctx(), 
true, nd.dtype());
   116       CHECK(temp.IsDefaultData());
   118       NDArray temp = bufs != 
nullptr ? bufs->at(i) : 
NDArray(nd.shape(), nd.ctx(),
   121       temp_src->emplace_back(nd);
   122       temp_dst->emplace_back(temp);
   123       blobs->emplace_back(temp.data());
   126       blobs->push_back(nd.data());
   140                                    const std::vector<NDArray> &ndoutputs,
   141                                    const std::vector<NDArray> *in_bufs,
   142                                    const std::vector<NDArray> *out_bufs,
   143                                    std::vector<OpReqType> *req,
   144                                    std::vector<TBlob> *input_blobs,
   145                                    std::vector<TBlob> *output_blobs,
   146                                    std::vector<NDArray> *pre_temp_src,
   147                                    std::vector<NDArray> *pre_temp_dst,
   148                                    std::vector<NDArray> *post_temp_src,
   149                                    std::vector<NDArray> *post_temp_dst,
   150                                    std::unordered_map<uint32_t, uint32_t> *in_temp_idx_map,
   151                                    const std::vector<uint32_t> &mutate_idx) {
   159   for (
const auto idx : mutate_idx) {
   160     auto map_iter = in_temp_idx_map->find(idx);
   161     if (map_iter != in_temp_idx_map->end()) {
   162       post_temp_src->push_back(pre_temp_dst->at(map_iter->second));
   163       post_temp_dst->push_back(ndinputs[idx]);
   176                                   const std::vector<NDArray>& dst,
   179   CHECK_EQ(dst.size(), src.size());
   180   for (
size_t i = 0; i < src.size(); i++) {
   183       CastStorageDispatch<gpu>(ctx, src[i], dst[i]);
   188       CastStorageDispatch<cpu>(ctx, src[i], dst[i]);
   197                      std::vector<int> *iattr,
   198                      std::vector<int> *oattr) {
   200   for (
int v : *oattr) {
   206     for (
int v : *iattr) {
   212   if (def_v == -1) 
return false;
   213   for (
int& v : *oattr) {
   216   for (
int& v : *iattr) {
   231                                std::vector<int> *iattr,
   232                                std::vector<int> *oattr) {
   233   bool fallback = 
false;
   234   for (
int& v : *oattr) {
   238   for (
int& v : *iattr) {
   255   if (storage_id == -1) {
   257   } 
else if (storage_id == -2) {
   258     str = 
"external storage (-2)";
   260     str = 
"group " + std::to_string(storage_id);
   286   const auto &idx = g.indexed_graph();
   287   const auto& vshape = g.GetAttr<nnvm::ShapeVector>(
"shape");
   288   const auto& vtype = g.GetAttr<nnvm::DTypeVector>(
"dtype");
   289   const auto& vstorage = g.GetAttr<nnvm::StorageVector>(
"storage_id");
   291   uint32_t node_start = 0, node_end = idx.num_nodes();
   292   if (g.attrs.count(
"node_range")) {
   293     const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >(
"node_range");
   294     node_start = range.first;
   295     node_end = range.second;
   297   for (uint32_t nid = node_start; nid < node_end; ++nid) {
   298     const auto& inode = idx[nid];
   299     if (inode.source->is_variable()) {
   300       LOG(INFO) << 
"node " << nid << 
" var";
   302       LOG(INFO) << 
"node " << nid << 
" " << inode.source->attrs.op->name;
   303       for (
const auto& e : inode.inputs) {
   304         auto eid = idx.entry_id(e);
   305         size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
   306         LOG(INFO) << 
"\t\tinput " << eid << 
": " << vshape[eid] << 
" ("   307                   << kilo_bytes << 
" KB) -> " << 
storage_str(vstorage[eid]);
   309       for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
   310         uint32_t eid = idx.entry_id(nid, index);
   311         size_t kilo_bytes = vshape[eid].Size() * mshadow::mshadow_sizeof(vtype[eid]) / 1024;
   312         LOG(INFO) << 
"\t\toutput " << eid << 
": " << vshape[eid] << 
" ("   313                   << kilo_bytes << 
" KB) -> " << 
storage_str(vstorage[eid]);
   345   const auto &idx = g.indexed_graph();
   348   uint32_t node_start = 0, node_end = idx.num_nodes();
   349   if (g.attrs.count(
"node_range")) {
   350     const auto& range = g.GetAttr<std::pair<uint32_t, uint32_t> >(
"node_range");
   351     node_start = range.first;
   352     node_end = range.second;
   354   for (uint32_t nid = node_start; nid < node_end; ++nid) {
   355     const auto& inode = idx[nid];
   356     if (inode.source->is_variable()) {
   357       LOG(INFO) << 
"node " << nid << 
" var";
   359       LOG(INFO) << 
"node " << nid << 
" " << inode.source->attrs.op->name
   361       for (
const auto& e : inode.inputs) {
   362         auto eid = idx.entry_id(e);
   363         LOG(INFO) << 
"\t\tinput " << eid << 
": " << 
stype_string(vstorage_type[eid]);
   365       for (uint32_t index = 0; index < inode.source->num_outputs(); ++index) {
   366         uint32_t eid = idx.entry_id(nid, index);
   367         LOG(INFO) << 
"\t\toutput " << eid << 
": " << 
stype_string(vstorage_type[eid]);
   375                                   const nnvm::IndexedGraph& idx,
   376                                   const nnvm::ShapeVector& inferred_shapes) {
   378   std::ostringstream oss;
   379   for (
size_t i = 0; i < num_forward_inputs; ++i) {
   380     const uint32_t nid = idx.input_nodes().at(i);
   381     const uint32_t eid = idx.entry_id(nid, 0);
   382     const TShape& inferred_shape = inferred_shapes[eid];
   383     if (inferred_shape.ndim() == 0 || inferred_shape.Size() == 0U) {
   384       const std::string& arg_name = idx[nid].source->attrs.name;
   385       oss << arg_name << 
": " << inferred_shape << 
", ";
   392   LOG(FATAL) << 
"InferShape pass cannot decide shapes for the following arguments "   393                 "(0s means unknown dimensions). Please consider providing them as inputs:\n"   399                                  const nnvm::IndexedGraph& idx,
   400                                  const nnvm::DTypeVector& inferred_dtypes) {
   402   std::ostringstream oss;
   403   for (
size_t i = 0; i < num_forward_inputs; ++i) {
   404     const uint32_t nid = idx.input_nodes().at(i);
   405     const uint32_t eid = idx.entry_id(nid, 0);
   406     const int inferred_dtype = inferred_dtypes[eid];
   407     if (inferred_dtype == -1) {
   408       const std::string& arg_name = idx[nid].source->attrs.name;
   409       oss << arg_name << 
": " << inferred_dtype << 
", ";
   416   LOG(FATAL) << 
"InferType pass cannot decide dtypes for the following arguments "   417                 "(-1 means unknown dtype). Please consider providing them as inputs:\n"   423                                         const nnvm::IndexedGraph& idx,
   426   std::ostringstream oss;
   427   for (
size_t i = 0; i < num_forward_inputs; ++i) {
   428     const uint32_t nid = idx.input_nodes().at(i);
   429     const uint32_t eid = idx.entry_id(nid, 0);
   430     const int inferred_stype = inferred_stypes[eid];
   431     if (inferred_stype == -1) {
   432       const std::string& arg_name = idx[nid].source->attrs.name;
   440   LOG(FATAL) << 
"InferStorageType pass cannot decide storage type for the following arguments "   441                 "(-1 means unknown stype). Please consider providing them as inputs:\n"   454                                const TShape& dest_arg_shape,
   455                                const int dest_arg_dtype,
   458                                std::unordered_map<std::string, NDArray>* shared_buffer,
   459                                bool enable_row_sparse_sharing) {
   461   if (enable_row_sparse_sharing) {
   464   auto it = shared_buffer->find(name);
   465   if (it != shared_buffer->end()) {
   467     bool size_shareable = it->second.shape().Size() >= dest_arg_shape.Size();
   468     if (size_shareable && stype_shareable) {  
   469       CHECK_EQ(it->second.dtype(), dest_arg_dtype)
   470           << 
"Requested arg array's dtype does not match that of the reusable ndarray";
   471       CHECK_EQ(it->second.storage_type(), dest_arg_stype)
   472           << 
"Requested arg array's stype does not match that of the reusable ndarray";
   473       return it->second.Reshape(dest_arg_shape);
   474     } 
else if (stype_shareable) {
   475       LOG(WARNING) << 
"Bucketing: data " << name << 
" has a shape " << dest_arg_shape
   476                    << 
", which is larger than already allocated shape " << it->second.shape()
   477                    << 
". Need to re-allocate. Consider putting default bucket key to be "   478                    << 
"the bucket taking the largest input for better memory sharing.";
   481       it->second = 
InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
   485       return InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
   488     auto ret = 
InitZeros(dest_arg_stype, dest_arg_shape, ctx, dest_arg_dtype);
   489     if (stype_shareable) {
   490       shared_buffer->emplace(name, ret);
   502                                  const std::map<std::string, Context>& ctx_map,
   503                                  const std::vector<Context>& in_arg_ctxes,
   504                                  const std::vector<Context>& arg_grad_ctxes,
   505                                  const std::vector<Context>& aux_state_ctxes,
   506                                  const std::vector<OpReqType>& grad_req_types,
   507                                  size_t num_forward_inputs,
   508                                  size_t num_forward_outputs) {
   509   const auto& idx = g.indexed_graph();
   510   const auto& mutable_nodes = idx.mutable_input_nodes();
   512   if (ctx_map.size() == 0) {
   513     g.attrs[
"context"] = std::make_shared<nnvm::any>(
   514         exec::ContextVector(idx.num_nodes(), default_ctx));
   515     for (
const auto& x : in_arg_ctxes) {
   516       CHECK(x == default_ctx)
   517           << 
"Input array is in " << x << 
" while binding with ctx=" << default_ctx
   518           << 
". All arguments must be in global context (" << default_ctx
   519           << 
") unless group2ctx is specified for cross-device graph.";
   521     for (
const auto& x : arg_grad_ctxes) {
   522       CHECK(x == default_ctx)
   523           << 
"Gradient array is in " << x << 
" while binding with ctx="   524           << default_ctx << 
". All gradients must be in global context (" << default_ctx
   525           << 
") unless group2ctx is specified for cross-device graph.";
   531   std::map<Context, int> ctx2id;  
   532   std::vector<Context> ctx_list;  
   533   nnvm::DeviceVector device(idx.num_nodes(), -1);  
   534   nnvm::DeviceAssignMap device_map;  
   538   for (
auto &kv : ctx_map) {
   539     if (ctx2id.count(kv.second) == 0) {  
   540       ctx2id[kv.second] = 
static_cast<int>(ctx_list.size());  
   541       ctx_list.push_back(kv.second);  
   544     device_map[kv.first] = ctx2id.at(kv.second);
   549   size_t arg_top = 0, aux_top = 0;
   550   for (
size_t i = 0; i < num_forward_inputs; ++i) {
   551     const uint32_t nid = idx.input_nodes().at(i);
   553     if (mutable_nodes.count(nid)) {  
   554       CHECK_LT(aux_top, aux_state_ctxes.size());
   555       ctx = aux_state_ctxes[aux_top];
   558       CHECK_LT(arg_top, in_arg_ctxes.size());
   559       ctx = in_arg_ctxes[arg_top];
   562     if (ctx2id.count(ctx) == 0) {  
   563       ctx2id[ctx] = 
static_cast<int>(ctx_list.size());  
   564       ctx_list.push_back(ctx);  
   566     device[nid] = ctx2id.at(ctx);  
   571   size_t arg_grad_offset = 0;
   574   CHECK_GE(grad_req_types.size(), g.outputs.size() - num_forward_outputs)
   575       << 
"insufficient number of grad_reqs";
   576   for (
size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
   577     while (grad_req_types[arg_grad_offset] == 
kNullOp) ++arg_grad_offset;
   578     const uint32_t nid = idx.outputs()[i].node_id;
   579     Context ctx = arg_grad_ctxes[arg_grad_offset];
   580     if (ctx2id.count(ctx) == 0) {
   581       ctx2id[ctx] = 
static_cast<int>(ctx_list.size());
   582       ctx_list.push_back(ctx);
   584     int devid = ctx2id.at(ctx);
   585     if (device[nid] != -1) {
   586       CHECK_EQ(device[nid], devid) << 
"device of same output not equal to each other";
   592   g.attrs[
"device"] = std::make_shared<dmlc::any>(std::move(device));
   593   g = nnvm::pass::PlaceDevice(g, 
"__ctx_group__", device_map, 
"_CrossDeviceCopy");
   594   const auto& assigned_devices = g.GetAttr<nnvm::DeviceVector>(
"device");
   596   exec::ContextVector vcontext;
   597   for (
auto context : assigned_devices) {
   599       vcontext.push_back(default_ctx);
   601       vcontext.push_back(ctx_list[context]);
   608   auto &new_idx = g.indexed_graph();
   610   for (
size_t i = num_forward_outputs; i < g.outputs.size(); ++i, ++arg_grad_offset) {
   611     while (grad_req_types[arg_grad_offset] == 
kNullOp) ++arg_grad_offset;
   612     const uint32_t nid = new_idx.outputs()[i].node_id;
   613     Context ctx = arg_grad_ctxes[arg_grad_offset];
   614     CHECK(ctx == vcontext[nid])
   615         << 
"Trying to save gradient to " << ctx
   616         << 
" while its source node \"" << new_idx[nid].source->attrs.name
   617         << 
"\" computes it on " << vcontext[nid]
   618         << 
". Check your ctx in NDArray allocation.";
   621   g.attrs[
"context"] = std::make_shared<nnvm::any>(std::move(vcontext));
   627 #endif  // MXNET_COMMON_EXEC_UTILS_H_ 
NDArrayStorageType
Definition: ndarray.h:61
 
bool SameType(const nnvm::NodeAttrs &attrs, std::vector< int > *iattr, std::vector< int > *oattr)
The default type inference function, which assigns all undefined types to the same type of one of the...
Definition: exec_utils.h:196
 
#define MXNET_GPU_NOT_ENABLED_ERROR
Error message for using gpu when MXNET_USE_CUDA==0. 
Definition: base.h:68
 
std::vector< DispatchMode > DispatchModeVector
The result holder of dispatch mode of each Node in the graph. 
Definition: graph_attr_types.h:60
 
no operation, do not write anything 
Definition: op_attr_types.h:47
 
write gradient to provided space 
Definition: op_attr_types.h:49
 
namespace of mxnet 
Definition: base.h:118
 
void HandleInferShapeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const nnvm::ShapeVector &inferred_shapes)
Definition: exec_utils.h:374
 
nnvm::TShape TShape
Shape data structure used to record shape information. 
Definition: base.h:128
 
DispatchMode
the dispatch mode of the operator 
Definition: op_attr_types.h:113
 
NDArray InitZeros(const NDArrayStorageType stype, const TShape &shape, const Context &ctx, const int dtype)
Return an NDArray of all zeros. 
Definition: utils.h:692
 
std::string stype_string(const int x)
get string representation of storage_type 
Definition: utils.h:374
 
All the possible information needed by Operator.Forward and Backward This is the superset of RunConte...
Definition: op_attr_types.h:66
 
void HandleInferStorageTypeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const StorageTypeVector &inferred_stypes)
Definition: exec_utils.h:422
 
nnvm::Graph AssignContext(nnvm::Graph g, const Context &default_ctx, const std::map< std::string, Context > &ctx_map, const std::vector< Context > &in_arg_ctxes, const std::vector< Context > &arg_grad_ctxes, const std::vector< Context > &aux_state_ctxes, const std::vector< OpReqType > &grad_req_types, size_t num_forward_inputs, size_t num_forward_outputs)
Assign context to the graph. This is triggered by both simple_bind and bind flows. 
Definition: exec_utils.h:500
 
std::string dispatch_mode_string(const DispatchMode x)
get string representation of dispatch_mode 
Definition: utils.h:356
 
void CastNonDefaultStorage(const std::vector< NDArray > &src, const std::vector< NDArray > &dst, const OpContext &ctx, const bool is_gpu)
Definition: exec_utils.h:175
 
std::string storage_str(int storage_id)
Definition: exec_utils.h:253
 
bool SetupDefaultBlobsIn(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst, std::unordered_map< uint32_t, uint32_t > *idx_map)
Definition: exec_utils.h:52
 
perform an inplace write, This option only happen when Target shares memory with one of input argumen...
Definition: op_attr_types.h:55
 
const TBlob & data() const 
Definition: ndarray.h:269
 
void SetupDefaultBlobsInOut(const std::vector< NDArray > &ndinputs, const std::vector< NDArray > &ndoutputs, const std::vector< NDArray > *in_bufs, const std::vector< NDArray > *out_bufs, std::vector< OpReqType > *req, std::vector< TBlob > *input_blobs, std::vector< TBlob > *output_blobs, std::vector< NDArray > *pre_temp_src, std::vector< NDArray > *pre_temp_dst, std::vector< NDArray > *post_temp_src, std::vector< NDArray > *post_temp_dst, std::unordered_map< uint32_t, uint32_t > *in_temp_idx_map, const std::vector< uint32_t > &mutate_idx)
Definition: exec_utils.h:139
 
add to the provided space 
Definition: op_attr_types.h:57
 
bool SetupDefaultBlobsOut(const std::vector< NDArray > &src, const std::vector< NDArray > *bufs, std::vector< OpReqType > *req, std::vector< TBlob > *blobs, std::vector< NDArray > *temp_src, std::vector< NDArray > *temp_dst)
Definition: exec_utils.h:84
 
void HandleInferTypeError(const size_t num_forward_inputs, const nnvm::IndexedGraph &idx, const nnvm::DTypeVector &inferred_dtypes)
Definition: exec_utils.h:398
 
Context information about the execution environment. 
Definition: base.h:133
 
NDArray ReshapeOrCreate(const std::string &name, const TShape &dest_arg_shape, const int dest_arg_dtype, const NDArrayStorageType dest_arg_stype, const Context &ctx, std::unordered_map< std::string, NDArray > *shared_buffer, bool enable_row_sparse_sharing)
If the requested ndarray's shape size is less than the corresponding shared_data_array's shape size a...
Definition: exec_utils.h:453
 
void LogInferStorage(const nnvm::Graph &g)
Definition: exec_utils.h:344
 
bool DefaultStorageType(const nnvm::NodeAttrs &attrs, const int dev_mask, DispatchMode *dispatch_mode, std::vector< int > *iattr, std::vector< int > *oattr)
The default storage type inference function, which assigns all undefined storage types to kDefaultSto...
Definition: exec_utils.h:228
 
ndarray interface 
Definition: ndarray.h:82
 
std::vector< int > StorageTypeVector
The result holder of storage type of each NodeEntry in the graph. 
Definition: graph_attr_types.h:45
 
void LogMemoryPlan(const nnvm::Graph &g)
Definition: exec_utils.h:285