44 #if DMLC_USE_CXX11 && defined(__GNUC__) && !defined(__clang_version__) 45 #if __GNUC__ == 4 && __GNUC_MINOR__ < 8 46 #error "Currently we need g++ 4.8 or higher to fully support c++11 features" 57 #define MXNET_API __declspec(dllexport) 59 #define MXNET_API __declspec(dllimport) 68 #ifndef MXNET_PREDICT_ONLY 69 #define MXNET_PREDICT_ONLY 0 79 #define MXNET_VERSION (MXNET_MAJOR*10000 + MXNET_MINOR*100 + MXNET_PATCH) 81 #define MXNET_MAKE_VERSION(major, minor, patch) ((major)*10000 + (minor)*100 + patch) 85 #define PROFILER_MESSAGE_FUNCNAME (__FUNCTION__) 150 return !(*
this == b);
157 strm->
Write(&dev_type,
sizeof(dev_type));
158 strm->
Write(&dev_id,
sizeof(dev_id));
166 if (strm->
Read(&dev_type,
sizeof(dev_type)) !=
sizeof(
dev_type))
return false;
167 if (strm->
Read(&dev_id,
sizeof(int32_t)) !=
sizeof(int32_t))
return false;
181 inline static Context CPU(int32_t dev_id = 0);
187 inline static Context GPU(int32_t dev_id = -1);
232 static void CudaLibChecks();
235 static void CuDNNLibChecks();
248 primary_stream_(primary_stream),
249 aux_stream_(primary_stream),
250 gpu_stream_sync_event_(nullptr) {
253 bool primary_has_blas_handle =
255 bool primary_has_dnn_handle =
258 primary_has_dnn_handle,
260 MSHADOW_CUDA_CALL(cudaEventCreateWithFlags(&gpu_stream_sync_event_, cudaEventDisableTiming));
266 if (aux_stream_ != primary_stream_) {
276 if (aux_stream_ != primary_stream_)
277 StreamSync(primary_stream_, aux_stream_, gpu_stream_sync_event_);
284 if (aux_stream_ != primary_stream_)
285 StreamSync(aux_stream_, primary_stream_, gpu_stream_sync_event_);
303 cudaEvent_t gpu_stream_sync_event_;
321 gpu_aux_stream_->PreAuxStreamUseSync();
325 gpu_aux_stream_->PostAuxStreamUseSync();
343 #endif // MXNET_USE_CUDA 369 template<
typename xpu>
403 if (dev_type &
kGPU) {
412 CHECK_EQ(cudaGetDevice(&ctx.
dev_id), cudaSuccess);
414 LOG(FATAL) <<
"Please compile with CUDA enabled for cuda features";
438 int cuda_driver_version = 0;
439 CHECK_EQ(cudaDriverGetVersion(&cuda_driver_version), cudaSuccess);
440 return cuda_driver_version > 0;
452 cudaError_t e = cudaGetDeviceCount(&count);
455 if (e == cudaErrorNoDevice || e == cudaErrorInsufficientDriver) {
458 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
467 const int32_t default_num_streams = 1;
469 static int32_t num_streams =
470 dmlc::GetEnv(
"MXNET_GPU_WORKER_NSTREAMS", default_num_streams) >= 2 ? 2 : 1;
475 uint64_t *total_mem) {
482 e = cudaGetDevice(&curDevice);
483 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
485 e = cudaSetDevice(dev);
486 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
488 e = cudaMemGetInfo(&memF, &memT);
489 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
491 e = cudaSetDevice(curDevice);
492 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
494 *free_mem =
static_cast<uint64_t
>(memF);
495 *total_mem =
static_cast<uint64_t
>(memT);
499 <<
"This call is only supported for MXNet built with CUDA support.";
506 const std::string::size_type l = str.find(
'(');
507 CHECK_NE(l, std::string::npos);
508 const std::string::size_type r = str.find(
')');
509 CHECK_EQ(r, str.length()-1);
511 const std::string type = str.substr(0, l);
512 int id = std::stoi(str.substr(l+1, r-l-1));
515 }
else if (type ==
"gpu") {
517 }
else if (type ==
"cpu_pinned") {
519 }
else if (type ==
"cpu_shared") {
522 LOG(FATAL) <<
"Invalid context string " << str;
525 LOG(FATAL) <<
"Invalid context string " << str;
536 out <<
"cpu_pinned(";
538 out <<
"cpu_shared(";
547 #define STRINGIZE_DETAIL(x) #x 548 #define STRINGIZE(x) STRINGIZE_DETAIL(x) 549 #define MXNET_DESCRIBE(...) describe(__VA_ARGS__ "\n\nFrom:" __FILE__ ":" STRINGIZE(__LINE__)) 550 #define ADD_FILELINE "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__) 553 #if MXNET_USE_MKLDNN == 1 || MXNET_USE_INTGEMM == 1 554 constexpr
size_t kMKLDNNAlign = 64;
560 template<>
struct hash<mxnet::
Context> {
569 #if __cplusplus < 201402L && !defined(_MSC_VER) 570 template<
typename T,
typename... Args>
571 inline std::unique_ptr<T> make_unique(Args&&... args) {
572 return std::unique_ptr<T>(
new T(std::forward<Args>(args)...));
579 #endif // MXNET_BASE_H_ static const int32_t kMaxDevID
the maximal device index
Definition: base.h:173
bool is_bulk
indicator of whether this execution is run in bulk mode
Definition: base.h:363
static cudaStream_t GetStream(Stream< gpu > *stream)
returns actual cudaStream_t given an input GPU stream pointer
Definition: stream_gpu-inl.h:97
HandleState dnn_handle_ownership_
cudnn handle ownership
Definition: stream_gpu-inl.h:60
namespace of mxnet
Definition: api_registry.h:33
static void GetGPUMemoryInformation(int dev, uint64_t *free, uint64_t *total)
get the free and total available memory on a GPU
SyncedGPUAuxStream(GPUAuxStream *gpu_aux_stream)
constructor.
Definition: base.h:320
Definition: stream_gpu-inl.h:37
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:119
bool operator==(const Context &b) const
check if current context equals another one
Definition: base.h:141
void PostAuxStreamUseSync()
Makes future primary stream work wait on the completion of existing aux stream work.
Definition: base.h:282
static int32_t GetGPUCount()
bool Load(dmlc::Stream *strm)
load the content from binary stream
Definition: base.h:165
mshadow::default_real_t real_t
data type that will be used to store ndarray
Definition: base.h:96
bool operator<(const Context &b) const
Comparator, used to enable Context as std::map key.
static Context GPU(int32_t dev_id=-1)
Definition: optional.h:251
GPUAuxStream(mshadow::Stream< gpu > *primary_stream)
constructor.
Definition: base.h:247
Context ctx
base Context
Definition: base.h:351
SyncedGPUAuxStream get_gpu_aux_stream() const
get an RAII object that transparently handles the syncing of the auxiliary stream.
Definition: base.h:378
static const int32_t kMaxDevType
the maximal device type
Definition: base.h:171
execution time context. The information needed in runtime for actual execution.
Definition: base.h:349
interface of stream I/O for serialization
Definition: io.h:30
Holds an auxiliary mshadow gpu stream that can be synced with a primary stream.
Definition: base.h:241
mshadow::Stream< gpu > * GetStream()
Getter for created auxiliary stream.
Definition: base.h:288
Stream< gpu > * NewStream< gpu >(bool create_blas_handle, bool create_dnn_handle, int dev_id)
Definition: stream_gpu-inl.h:210
Provides automatic coordination of an auxilary stream with a primary one. This object, upon construction, prepares an aux stream for use by syncing it with enqueued primary-stream work. Object destruction will sync again so future primary-stream work will wait on enqueued aux-stream work. If MXNET_GPU_WORKER_NSTREAMS == 1, then this defaults simply: the primary stream will equal the aux stream and the syncs will be executed as nops. See ./src/operator/cudnn/cudnn_convolution-inl.h for a usage example.
Definition: base.h:314
#define MSHADOW_CUDA_CALL(func)
Protected cuda call in mshadow.
Definition: base.h:278
void * aux_stream
the auxiliary stream of the device, can be nullptr or Stream<gpu>* in GPU mode
Definition: base.h:359
~GPUAuxStream()
destructor
Definition: base.h:264
mshadow::Stream< xpu > * get_stream() const
get mshadow stream from Context
Definition: base.h:370
DeviceType dev_type
the device type we run the op on
Definition: base.h:110
static bool GPUDriverPresent()
header file of tensor data structure and functions This lib requires explicit memory allocation and d...
device name CPU
Definition: tensor.h:39
device name GPU
Definition: tensor.h:46
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:50
HandleState blas_handle_ownership_
cudnn handle
Definition: stream_gpu-inl.h:56
static int32_t GetGPUStreamsPerWorker()
size_t HashCombine(size_t key, const T &value)
hash an object and combines the key with previous keys
Definition: common.h:37
int dev_id
dev id
Definition: stream_gpu-inl.h:64
int32_t dev_id
device id we are going to run it on
Definition: base.h:112
#define MSHADOW_CATCH_ERROR(func)
Run function and catch error, log unknown error.
Definition: base.h:292
int32_t index_t
type that will be used for index
Definition: base.h:343
void * stream
the stream of the device, can be nullptr or Stream<gpu>* in GPU mode
Definition: base.h:355
mshadow::gpu gpu
mxnet gpu
Definition: base.h:92
float default_real_t
float point type that will be used in default by mshadow
Definition: base.h:355
DeviceType
Type of device.
Definition: base.h:103
int real_dev_id() const
Returns dev_id for kGPU and kCPUPinned, 0 otherwise.
Definition: base.h:126
static Context CPUShared(int32_t dev_id=0)
cudaStream_t stream_
cudaStream
Definition: stream_gpu-inl.h:44
mshadow::cpu cpu
mxnet cpu
Definition: base.h:90
virtual size_t Read(void *ptr, size_t size)=0
reads data from a stream
~SyncedGPUAuxStream()
destructor
Definition: base.h:324
nnvm::Op Op
operator structure from NNVM
Definition: base.h:98
static const int kDevMask
device flag number, identifies this device
Definition: tensor.h:43
Context()
default constructor
Definition: base.h:114
static Context Create(DeviceType dev_type, int32_t dev_id=-1)
Create a new context.
Data structure Tuple and TShape to store dynamic sized shapes.
static Context CPU(int32_t dev_id=0)
virtual void Write(const void *ptr, size_t size)=0
writes data to a stream
void Save(dmlc::Stream *strm) const
save the content into binary stream
Definition: base.h:156
static Context CPUPinned(int32_t dev_id=-1)
Operator information structor.
void PreAuxStreamUseSync()
Makes future aux stream work wait on the completion of existing primary stream work.
Definition: base.h:274
static Context FromString(const std::string &str)
mshadow::Stream< gpu > * GetStream() const
Getter for underlying mshadow::Stream<gpu>.
Definition: base.h:336
const Context & get_ctx() const
get the base Context from RunContext
Definition: base.h:383
static void StreamSync(mshadow::Stream< gpu > *s1, mshadow::Stream< gpu > *s2, cudaEvent_t event)
Make future work enqueued to s2 wait on completion of current work enqueued to s1.
Definition: base.h:295
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:94
TBlob class that holds common representation of arbirary dimension tensor, can be used to transformed...
Symbolic graph construction API.
std::ostream & operator<<(std::ostream &os, const optional< T > &t)
serialize an optional object to string.
Definition: optional.h:151
Context information about the execution environment.
Definition: base.h:101
Provide lightweight util to do parameter setup and checking.
type traits information header
Operator structure.
Definition: op.h:103
void DeleteStream< gpu >(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:200
get features of the MXNet library at runtime
bool operator!=(const Context &b) const
check if current context not equals another one
Definition: base.h:149
computaion stream structure, used for asynchronous computations
Definition: tensor.h:383