28 #include "dmlc/base.h" 31 #include "dmlc/type_traits.h" 32 #include "dmlc/parameter.h" 33 #include "mshadow/tensor.h" 36 #include "nnvm/symbolic.h" 45 #if DMLC_USE_CXX11 && defined(__GNUC__) && !defined(__clang_version__) 46 #if __GNUC__ == 4 && __GNUC_MINOR__ < 8 47 #error "Currently we need g++ 4.8 or higher to fully support c++11 features" 58 #define MXNET_API __declspec(dllexport) 60 #define MXNET_API __declspec(dllimport) 69 #ifndef MXNET_PREDICT_ONLY 70 #define MXNET_PREDICT_ONLY 0 80 #define MXNET_VERSION (MXNET_MAJOR*10000 + MXNET_MINOR*100 + MXNET_PATCH) 82 #define MXNET_MAKE_VERSION(major, minor, patch) ((major)*10000 + (minor)*100 + patch) 86 #define PROFILER_MESSAGE_FUNCNAME (__FUNCTION__) 97 typedef mshadow::default_real_t
real_t;
151 return !(*
this == b);
157 inline void Save(dmlc::Stream *strm)
const {
158 strm->Write(&dev_type,
sizeof(dev_type));
159 strm->Write(&dev_id,
sizeof(dev_id));
166 inline bool Load(dmlc::Stream *strm) {
167 if (strm->Read(&dev_type,
sizeof(dev_type)) !=
sizeof(
dev_type))
return false;
168 if (strm->Read(&dev_id,
sizeof(int32_t)) !=
sizeof(int32_t))
return false;
182 inline static Context CPU(int32_t dev_id = 0);
188 inline static Context GPU(int32_t dev_id = -1);
236 primary_stream_(primary_stream),
237 aux_stream_(primary_stream),
238 gpu_stream_sync_event_(nullptr) {
241 bool primary_has_blas_handle =
242 primary_stream->blas_handle_ownership_ == mshadow::Stream<gpu>::OwnHandle;
243 bool primary_has_dnn_handle =
244 primary_stream->dnn_handle_ownership_ == mshadow::Stream<gpu>::OwnHandle;
245 aux_stream_ = mshadow::NewStream<gpu>(primary_has_blas_handle,
246 primary_has_dnn_handle,
247 primary_stream->dev_id);
248 MSHADOW_CUDA_CALL(cudaEventCreateWithFlags(&gpu_stream_sync_event_, cudaEventDisableTiming));
254 if (aux_stream_ != primary_stream_) {
255 MSHADOW_CATCH_ERROR(mshadow::DeleteStream<gpu>(aux_stream_));
256 MSHADOW_CATCH_ERROR(cudaEventDestroy(gpu_stream_sync_event_));
264 if (aux_stream_ != primary_stream_)
265 StreamSync(primary_stream_, aux_stream_, gpu_stream_sync_event_);
272 if (aux_stream_ != primary_stream_)
273 StreamSync(aux_stream_, primary_stream_, gpu_stream_sync_event_);
276 mshadow::Stream<gpu> *
GetStream() {
return aux_stream_; }
283 static void StreamSync(mshadow::Stream<gpu> *s1, mshadow::Stream<gpu> *s2, cudaEvent_t event) {
284 MSHADOW_CUDA_CALL(cudaEventRecord(event, s1->stream_));
285 MSHADOW_CUDA_CALL(cudaStreamWaitEvent(s2->stream_, event, 0));
289 mshadow::Stream<gpu> *primary_stream_;
290 mshadow::Stream<gpu> *aux_stream_;
291 cudaEvent_t gpu_stream_sync_event_;
309 gpu_aux_stream_->PreAuxStreamUseSync();
313 gpu_aux_stream_->PostAuxStreamUseSync();
325 return gpu_aux_stream_->GetStream();
331 #endif // MXNET_USE_CUDA 357 template<
typename xpu>
359 return static_cast<mshadow::Stream<xpu>*
>(stream);
392 if (dev_type &
kGPU) {
394 CHECK_EQ(cudaGetDevice(&ctx.
dev_id), cudaSuccess);
396 LOG(FATAL) <<
"Please compile with CUDA enabled for cuda features";
423 cudaError_t e = cudaGetDeviceCount(&count);
424 if (e == cudaErrorNoDevice) {
427 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
436 const int32_t default_num_streams = 1;
438 static int32_t num_streams =
439 dmlc::GetEnv(
"MXNET_GPU_WORKER_NSTREAMS", default_num_streams) >= 2 ? 2 : 1;
444 uint64_t *total_mem) {
451 e = cudaGetDevice(&curDevice);
452 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
454 e = cudaSetDevice(dev);
455 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
457 e = cudaMemGetInfo(&memF, &memT);
458 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
460 e = cudaSetDevice(curDevice);
461 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
463 *free_mem =
static_cast<uint64_t
>(memF);
464 *total_mem =
static_cast<uint64_t
>(memT);
468 <<
"This call is only supported for MXNet built with CUDA support.";
475 const std::string::size_type l = str.find(
'(');
476 CHECK_NE(l, std::string::npos);
477 const std::string::size_type r = str.find(
')');
478 CHECK_EQ(r, str.length()-1);
480 const std::string type = str.substr(0, l);
481 int id = std::stoi(str.substr(l+1, r-l-1));
484 }
else if (type ==
"gpu") {
486 }
else if (type ==
"cpu_pinned") {
488 }
else if (type ==
"cpu_shared") {
491 LOG(FATAL) <<
"Invalid context string " << str;
494 LOG(FATAL) <<
"Invalid context string " << str;
505 out <<
"cpu_pinned(";
507 out <<
"cpu_shared(";
516 #define STRINGIZE_DETAIL(x) #x 517 #define STRINGIZE(x) STRINGIZE_DETAIL(x) 518 #define MXNET_DESCRIBE(...) describe(__VA_ARGS__ "\n\nFrom:" __FILE__ ":" STRINGIZE(__LINE__)) 519 #define ADD_FILELINE "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__) 522 #if MXNET_USE_MKLDNN == 1 523 constexpr
size_t kMKLDNNAlign = 64;
529 template<>
struct hash<mxnet::
Context> {
532 res = dmlc::HashCombine(res, static_cast<size_t>(ctx.
dev_type));
533 res = dmlc::HashCombine(res, static_cast<size_t>(ctx.
dev_id));
538 #if __cplusplus < 201402L && !defined(_MSC_VER) 539 template<
typename T,
typename... Args>
540 inline std::unique_ptr<T> make_unique(Args&&... args) {
541 return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
548 #endif // MXNET_BASE_H_ DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:120
static const int32_t kMaxDevID
the maximal device index
Definition: base.h:174
bool is_bulk
indicator of whether this execution is run in bulk mode
Definition: base.h:351
namespace of mxnet
Definition: base.h:89
static void GetGPUMemoryInformation(int dev, uint64_t *free, uint64_t *total)
get the free and total available memory on a GPU
SyncedGPUAuxStream(GPUAuxStream *gpu_aux_stream)
constructor.
Definition: base.h:308
mshadow::Stream< xpu > * get_stream() const
get mshadow stream from Context
Definition: base.h:358
void PostAuxStreamUseSync()
Makes future primary stream work wait on the completion of existing aux stream work.
Definition: base.h:270
static int32_t GetGPUCount()
bool Load(dmlc::Stream *strm)
load the content from binary stream
Definition: base.h:166
mshadow::default_real_t real_t
data type that will be used to store ndarray
Definition: base.h:97
static Context GPU(int32_t dev_id=-1)
GPUAuxStream(mshadow::Stream< gpu > *primary_stream)
constructor.
Definition: base.h:235
Context ctx
base Context
Definition: base.h:339
bool operator<(const Context &b) const
Comparator, used to enable Context as std::map key.
static const int32_t kMaxDevType
the maximal device type
Definition: base.h:172
execution time context. The information needed in runtime for actual execution.
Definition: base.h:337
Holds an auxiliary mshadow gpu stream that can be synced with a primary stream.
Definition: base.h:229
mshadow::Stream< gpu > * GetStream()
Getter for created auxiliary stream.
Definition: base.h:276
Provides automatic coordination of an auxilary stream with a primary one. This object, upon construction, prepares an aux stream for use by syncing it with enqueued primary-stream work. Object destruction will sync again so future primary-stream work will wait on enqueued aux-stream work. If MXNET_GPU_WORKER_NSTREAMS == 1, then this defaults simply: the primary stream will equal the aux stream and the syncs will be executed as nops. See ./src/operator/cudnn/cudnn_convolution-inl.h for a usage example.
Definition: base.h:302
void * aux_stream
the auxiliary stream of the device, can be NULL or Stream<gpu>* in GPU mode
Definition: base.h:347
~GPUAuxStream()
destructor
Definition: base.h:252
DeviceType dev_type
the device type we run the op on
Definition: base.h:111
static int32_t GetGPUStreamsPerWorker()
int32_t dev_id
device id we are going to run it on
Definition: base.h:113
mshadow::Stream< gpu > * GetStream() const
Getter for underlying mshadow::Stream<gpu>.
Definition: base.h:324
void * stream
the stream of the device, can be NULL or Stream<gpu>* in GPU mode
Definition: base.h:343
void Save(dmlc::Stream *strm) const
save the content into binary stream
Definition: base.h:157
mshadow::gpu gpu
mxnet gpu
Definition: base.h:93
const Context & get_ctx() const
get the base Context from RunContext
Definition: base.h:371
DeviceType
Type of device.
Definition: base.h:104
static Context CPUShared(int32_t dev_id=0)
mshadow::cpu cpu
mxnet cpu
Definition: base.h:91
~SyncedGPUAuxStream()
destructor
Definition: base.h:312
int real_dev_id() const
Returns dev_id for kGPU and kCPUPinned, 0 otherwise.
Definition: base.h:127
nnvm::Op Op
operator structure from NNVM
Definition: base.h:99
Context()
default constructor
Definition: base.h:115
static Context Create(DeviceType dev_type, int32_t dev_id=-1)
Create a new context.
bool operator!=(const Context &b) const
check if current context not equals another one
Definition: base.h:150
static Context CPU(int32_t dev_id=0)
std::ostream & operator<<(std::ostream &out, const NDArray &ndarray)
SyncedGPUAuxStream get_gpu_aux_stream() const
get an RAII object that transparently handles the syncing of the auxiliary stream.
Definition: base.h:366
static Context CPUPinned(int32_t dev_id=-1)
void PreAuxStreamUseSync()
Makes future aux stream work wait on the completion of existing primary stream work.
Definition: base.h:262
Data structure Tuple and TShape to store dynamic sized shapes.
static Context FromString(const std::string &str)
static void StreamSync(mshadow::Stream< gpu > *s1, mshadow::Stream< gpu > *s2, cudaEvent_t event)
Make future work enqueued to s2 wait on completion of current work enqueued to s1.
Definition: base.h:283
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:95
TBlob class that holds common representation of arbirary dimension tensor, can be used to transformed...
Context information about the execution environment.
Definition: base.h:102
get features of the MXNet library at runtime
bool operator==(const Context &b) const
check if current context equals another one
Definition: base.h:142
unsigned index_t
Definition: base.h:37