28 #include <dmlc/base.h> 30 #include <dmlc/type_traits.h> 31 #include <dmlc/parameter.h> 32 #include <mshadow/tensor.h> 35 #include <nnvm/tuple.h> 36 #include <nnvm/symbolic.h> 42 #ifndef MXNET_USE_OPENCV 43 #define MXNET_USE_OPENCV 1 49 #ifndef MXNET_USE_CUDA 50 #define MXNET_USE_CUDA MSHADOW_USE_CUDA 56 #ifndef MXNET_USE_CUDNN 57 #define MXNET_USE_CUDNN MSHADOW_USE_CUDNN 63 #ifndef MXNET_USE_CUSOLVER 64 #define MXNET_USE_CUSOLVER MSHADOW_USE_CUSOLVER 68 #define MXNET_GPU_NOT_ENABLED_ERROR "GPU is not enabled" 74 #if DMLC_USE_CXX11 && defined(__GNUC__) && !defined(__clang_version__) 75 #if __GNUC__ == 4 && __GNUC_MINOR__ < 8 76 #error "Currently we need g++ 4.8 or higher to fully support c++11 features" 87 #define MXNET_API __declspec(dllexport) 89 #define MXNET_API __declspec(dllimport) 98 #ifndef MXNET_PREDICT_ONLY 99 #define MXNET_PREDICT_ONLY 0 103 #define MXNET_MAJOR 1 105 #define MXNET_MINOR 4 107 #define MXNET_PATCH 1 109 #define MXNET_VERSION (MXNET_MAJOR*10000 + MXNET_MINOR*100 + MXNET_PATCH) 111 #define MXNET_MAKE_VERSION(major, minor, patch) ((major)*10000 + (minor)*100 + patch) 115 #define PROFILER_MESSAGE_FUNCNAME (__FUNCTION__) 182 return !(*
this == b);
188 inline void Save(dmlc::Stream *strm)
const {
189 strm->Write(&dev_type,
sizeof(dev_type));
190 strm->Write(&dev_id,
sizeof(dev_id));
197 inline bool Load(dmlc::Stream *strm) {
198 if (strm->Read(&dev_type,
sizeof(dev_type)) !=
sizeof(
dev_type))
return false;
199 if (strm->Read(&dev_id,
sizeof(int32_t)) !=
sizeof(int32_t))
return false;
213 inline static Context CPU(int32_t dev_id = 0);
219 inline static Context GPU(int32_t dev_id = -1);
269 template<
typename xpu>
271 return static_cast<mshadow::Stream<xpu>*
>(stream);
295 if (dev_type &
kGPU) {
297 CHECK_EQ(cudaGetDevice(&ctx.
dev_id), cudaSuccess);
299 LOG(FATAL) <<
"Please compile with CUDA enabled for cuda features";
326 cudaError_t e = cudaGetDeviceCount(&count);
327 if (e == cudaErrorNoDevice) {
330 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
338 uint64_t *total_mem) {
345 e = cudaGetDevice(&curDevice);
346 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
348 e = cudaSetDevice(dev);
349 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
351 e = cudaMemGetInfo(&memF, &memT);
352 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
354 e = cudaSetDevice(curDevice);
355 CHECK_EQ(e, cudaSuccess) <<
" CUDA: " << cudaGetErrorString(e);
357 *free_mem =
static_cast<uint64_t
>(memF);
358 *total_mem =
static_cast<uint64_t
>(memT);
362 <<
"This call is only supported for MXNet built with CUDA support.";
369 const std::string::size_type l = str.find(
'(');
370 CHECK_NE(l, std::string::npos);
371 const std::string::size_type r = str.find(
')');
372 CHECK_EQ(r, str.length()-1);
374 const std::string type = str.substr(0, l);
375 int id = std::stoi(str.substr(l+1, r-l-1));
378 }
else if (type ==
"gpu") {
380 }
else if (type ==
"cpu_pinned") {
382 }
else if (type ==
"cpu_shared") {
385 LOG(FATAL) <<
"Invalid context string " << str;
388 LOG(FATAL) <<
"Invalid context string " << str;
399 out <<
"cpu_pinned(";
401 out <<
"cpu_shared(";
410 #define STRINGIZE_DETAIL(x) #x 411 #define STRINGIZE(x) STRINGIZE_DETAIL(x) 412 #define MXNET_DESCRIBE(...) describe(__VA_ARGS__ "\n\nFrom:" __FILE__ ":" STRINGIZE(__LINE__)) 413 #define ADD_FILELINE "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__) 415 #if MXNET_USE_MKLDNN == 1 416 constexpr
size_t kMKLDNNAlign = 64;
422 template<>
struct hash<mxnet::
Context> {
425 res = dmlc::HashCombine(res, static_cast<size_t>(ctx.
dev_type));
426 res = dmlc::HashCombine(res, static_cast<size_t>(ctx.
dev_id));
434 #endif // MXNET_BASE_H_ DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:151
static const int32_t kMaxDevID
the maximal device index
Definition: base.h:205
namespace of mxnet
Definition: base.h:118
static void GetGPUMemoryInformation(int dev, uint64_t *free, uint64_t *total)
get the free and total available memory on a GPU
mshadow::Stream< xpu > * get_stream() const
get mshadow stream from Context
Definition: base.h:270
static int32_t GetGPUCount()
bool Load(dmlc::Stream *strm)
load the content from binary stream
Definition: base.h:197
mshadow::default_real_t real_t
data type that will be used to store ndarray
Definition: base.h:126
static Context GPU(int32_t dev_id=-1)
nnvm::TShape TShape
Shape data structure used to record shape information.
Definition: base.h:128
Context ctx
base Context
Definition: base.h:259
bool operator<(const Context &b) const
Comparator, used to enable Context as std::map key.
static const int32_t kMaxDevType
the maximal device type
Definition: base.h:203
execution time context. The information needed in runtime for actual execution.
Definition: base.h:257
DeviceType dev_type
the device type we run the op on
Definition: base.h:142
int32_t dev_id
device id we are going to run it on
Definition: base.h:144
void * stream
the stream of the device, can be NULL or Stream<gpu>* in GPU mode
Definition: base.h:263
void Save(dmlc::Stream *strm) const
save the content into binary stream
Definition: base.h:188
mshadow::gpu gpu
mxnet gpu
Definition: base.h:122
const Context & get_ctx() const
get the base Context from RunContext
Definition: base.h:274
DeviceType
Type of device.
Definition: base.h:135
static Context CPUShared(int32_t dev_id=0)
mshadow::cpu cpu
mxnet cpu
Definition: base.h:120
int real_dev_id() const
Returns dev_id for kGPU and kCPUPinned, 0 otherwise.
Definition: base.h:158
nnvm::Op Op
operator structure from NNVM
Definition: base.h:130
Context()
default constructor
Definition: base.h:146
static Context Create(DeviceType dev_type, int32_t dev_id=-1)
Create a new context.
bool operator!=(const Context &b) const
check if current context not equals another one
Definition: base.h:181
static Context CPU(int32_t dev_id=0)
std::ostream & operator<<(std::ostream &out, const NDArray &ndarray)
static Context CPUPinned(int32_t dev_id=-1)
static Context FromString(const std::string &str)
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:124
TBlob class that holds common representation of arbirary dimension tensor, can be used to transformed...
Context information about the execution environment.
Definition: base.h:133
bool operator==(const Context &b) const
check if current context equals another one
Definition: base.h:173
unsigned index_t
Definition: base.h:37