1.5.0/doxygen/include_2mxnet_2base_8h_source.html

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 #ifndef MXNET_BASE_H_
 #define MXNET_BASE_H_

 #include "dmlc/base.h"
 #include <string>
 #include "dmlc/io.h"
 #include "dmlc/type_traits.h"
 #include "dmlc/parameter.h"
 #include "mshadow/tensor.h"
 // nnvm headers for symbolic construction.
 #include "nnvm/op.h"
 #include "nnvm/symbolic.h"
 #include "libinfo.h"
 #include "tuple.h"


 #if DMLC_USE_CXX11 && defined(__GNUC__) && !defined(__clang_version__)
 #if __GNUC__ == 4 && __GNUC_MINOR__ < 8
 #error "Currently we need g++ 4.8 or higher to fully support c++11 features"
 #define override
 #define final
 #endif
 #endif

 #ifdef _MSC_VER
 #ifdef MXNET_EXPORTS
 #define MXNET_API __declspec(dllexport)
 #else
 #define MXNET_API __declspec(dllimport)
 #endif
 #else
 #define MXNET_API
 #endif

 #ifndef MXNET_PREDICT_ONLY
 #define MXNET_PREDICT_ONLY 0
 #endif

 #define MXNET_MAJOR 1

 #define MXNET_MINOR 5

 #define MXNET_PATCH 1

 #define MXNET_VERSION (MXNET_MAJOR*10000 + MXNET_MINOR*100 + MXNET_PATCH)

 #define MXNET_MAKE_VERSION(major, minor, patch) ((major)*10000 + (minor)*100 + patch)

 #define PROFILER_MESSAGE_FUNCNAME (__FUNCTION__)

 namespace mxnet {
 typedef mshadow::cpu cpu;
 typedef mshadow::gpu gpu;
 typedef mshadow::index_t index_t;
 typedef mshadow::default_real_t real_t;
 using Op = nnvm::Op;

 struct Context {
   enum DeviceType {
     kCPU = cpu::kDevMask,
     kGPU = gpu::kDevMask,
     kCPUPinned = 3,
     kCPUShared = 5,
   };
   DeviceType dev_type;
   int32_t dev_id;
   Context() : dev_type(kCPU), dev_id(0) {}
   inline DeviceType dev_mask() const {
     if (dev_type == kCPUPinned || dev_type == kCPUShared) return kCPU;
     return dev_type;
   }
   inline int real_dev_id() const {
     if (dev_type == kCPUPinned || dev_type == kGPU) return dev_id;
     return 0;
   }
   inline bool operator<(const Context &b) const;
   inline bool operator==(const Context &b) const {
     return dev_type == b.dev_type && dev_id == b.dev_id;
   }
   inline bool operator!=(const Context &b) const {
     return !(*this == b);
   }
   inline void Save(dmlc::Stream *strm) const {
     strm->Write(&dev_type, sizeof(dev_type));
     strm->Write(&dev_id, sizeof(dev_id));
   }
   inline bool Load(dmlc::Stream *strm) {
     if (strm->Read(&dev_type, sizeof(dev_type)) != sizeof(dev_type)) return false;
     if (strm->Read(&dev_id, sizeof(int32_t)) != sizeof(int32_t)) return false;
     return true;
   }
   static const int32_t kMaxDevType = 6;
   static const int32_t kMaxDevID = 16;
   inline static Context Create(DeviceType dev_type, int32_t dev_id = -1);
   inline static Context CPU(int32_t dev_id = 0);
   inline static Context GPU(int32_t dev_id = -1);
   inline static int32_t GetGPUCount();
   inline static int32_t GetGPUStreamsPerWorker();
   inline static void GetGPUMemoryInformation(int dev, uint64_t *free, uint64_t *total);
   inline static Context CPUPinned(int32_t dev_id = -1);
   inline static Context CPUShared(int32_t dev_id = 0);
   inline static Context FromString(const std::string& str);
 };

 #if MXNET_USE_CUDA

 class GPUAuxStream {
  public:
   explicit GPUAuxStream(mshadow::Stream<gpu> *primary_stream) :
       primary_stream_(primary_stream),
       aux_stream_(primary_stream),
       gpu_stream_sync_event_(nullptr) {
     if (Context::GetGPUStreamsPerWorker() >= 2) {
       // Create auxiliary stream on the same device with the same properties as the primary stream
       bool primary_has_blas_handle =
           primary_stream->blas_handle_ownership_ == mshadow::Stream<gpu>::OwnHandle;
       bool primary_has_dnn_handle =
           primary_stream->dnn_handle_ownership_ == mshadow::Stream<gpu>::OwnHandle;
       aux_stream_ = mshadow::NewStream<gpu>(primary_has_blas_handle,
                                             primary_has_dnn_handle,
                                             primary_stream->dev_id);
       MSHADOW_CUDA_CALL(cudaEventCreateWithFlags(&gpu_stream_sync_event_, cudaEventDisableTiming));
     }
   }
   ~GPUAuxStream() {
     // If the aux_stream_ == primary_stream_, then we created no new streams to destroy.
     if (aux_stream_ != primary_stream_) {
       MSHADOW_CATCH_ERROR(mshadow::DeleteStream<gpu>(aux_stream_));
       MSHADOW_CATCH_ERROR(cudaEventDestroy(gpu_stream_sync_event_));
     }
   }
   void PreAuxStreamUseSync() {
     // If the aux_stream_ == primary_stream_, then no synchronization is necessary.
     if (aux_stream_ != primary_stream_)
       StreamSync(primary_stream_, aux_stream_, gpu_stream_sync_event_);
   }
   void PostAuxStreamUseSync() {
     // If the aux_stream_ == primary_stream_, then no synchronization is necessary.
     if (aux_stream_ != primary_stream_)
       StreamSync(aux_stream_, primary_stream_, gpu_stream_sync_event_);
   }
   mshadow::Stream<gpu> *GetStream() { return aux_stream_; }
   static void StreamSync(mshadow::Stream<gpu> *s1, mshadow::Stream<gpu> *s2, cudaEvent_t event) {
     MSHADOW_CUDA_CALL(cudaEventRecord(event, s1->stream_));
     MSHADOW_CUDA_CALL(cudaStreamWaitEvent(s2->stream_, event, 0));
   }

  private:
   mshadow::Stream<gpu> *primary_stream_;
   mshadow::Stream<gpu> *aux_stream_;
   cudaEvent_t gpu_stream_sync_event_;
 };

 class SyncedGPUAuxStream {
  public:
   explicit SyncedGPUAuxStream(GPUAuxStream *gpu_aux_stream) : gpu_aux_stream_(gpu_aux_stream) {
     gpu_aux_stream_->PreAuxStreamUseSync();
   }
   ~SyncedGPUAuxStream() {
     gpu_aux_stream_->PostAuxStreamUseSync();
   }
   SyncedGPUAuxStream(const SyncedGPUAuxStream&) = delete;
   void operator=(const SyncedGPUAuxStream&) = delete;
   SyncedGPUAuxStream(SyncedGPUAuxStream&&) = default;
   SyncedGPUAuxStream& operator=(SyncedGPUAuxStream&&) = default;
   inline mshadow::Stream<gpu>* GetStream() const {
     return gpu_aux_stream_->GetStream();
   }

  private:
   GPUAuxStream *gpu_aux_stream_;
 };
 #endif  // MXNET_USE_CUDA

 struct RunContext {
   Context ctx;
   void *stream;
   void *aux_stream;
   bool is_bulk;
   template<typename xpu>
   inline mshadow::Stream<xpu>* get_stream() const {
     return static_cast<mshadow::Stream<xpu>*>(stream);
   }
 #if MXNET_USE_CUDA

   inline SyncedGPUAuxStream get_gpu_aux_stream() const {
     return SyncedGPUAuxStream(static_cast<GPUAuxStream*>(aux_stream));
   }
 #endif

   inline const Context& get_ctx() const {
     return ctx;
   }
 };
 }  // namespace mxnet

 namespace mxnet {
 // implementing Context
 inline bool Context::operator<(const Context &b) const {
   if (dev_type == b.dev_type) {
     return dev_id < b.dev_id;
   } else {
     return dev_type < b.dev_type;
   }
 }
 inline Context Context::Create(DeviceType dev_type, int32_t dev_id) {
   Context ctx;
   ctx.dev_type = dev_type;
   if (dev_id < 0) {
     ctx.dev_id = 0;
     if (dev_type & kGPU) {
 #if MXNET_USE_CUDA
       CHECK_EQ(cudaGetDevice(&ctx.dev_id), cudaSuccess);
 #else
       LOG(FATAL) << "Please compile with CUDA enabled for cuda features";
 #endif
     }
   } else {
     ctx.dev_id = dev_id;
   }
   return ctx;
 }
 inline Context Context::CPU(int32_t dev_id) {
   return Create(kCPU, dev_id);
 }

 inline Context Context::CPUPinned(int32_t dev_id) {
   return Create(kCPUPinned, dev_id);
 }

 inline Context Context::CPUShared(int32_t dev_id) {
   return Create(kCPUShared, dev_id);
 }

 inline Context Context::GPU(int32_t dev_id) {
   return Create(kGPU, dev_id);
 }

 inline int32_t Context::GetGPUCount() {
 #if MXNET_USE_CUDA
   int32_t count;
   cudaError_t e = cudaGetDeviceCount(&count);
   if (e == cudaErrorNoDevice) {
     return 0;
   }
   CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);
   return count;
 #else
   return 0;
 #endif
 }

 inline int32_t Context::GetGPUStreamsPerWorker() {
   // The default number of streams available if the user has not set MXNET_GPU_WORKER_NSTREAMS.
   const int32_t default_num_streams = 1;
   // The get_aux_stream() interface can supply one additional stream beyond the standard one.
   static int32_t num_streams =
       dmlc::GetEnv("MXNET_GPU_WORKER_NSTREAMS", default_num_streams) >= 2 ? 2 : 1;
   return num_streams;
 }

 inline void Context::GetGPUMemoryInformation(int dev, uint64_t *free_mem,
                                              uint64_t *total_mem) {
 #if MXNET_USE_CUDA

   size_t memF, memT;
   cudaError_t e;

   int curDevice;
   e = cudaGetDevice(&curDevice);
   CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);

   e = cudaSetDevice(dev);
   CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);

   e = cudaMemGetInfo(&memF, &memT);
   CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);

   e = cudaSetDevice(curDevice);
   CHECK_EQ(e, cudaSuccess) << " CUDA: " << cudaGetErrorString(e);

   *free_mem = static_cast<uint64_t>(memF);
   *total_mem = static_cast<uint64_t>(memT);

 #else
   LOG(FATAL)
       << "This call is only supported for MXNet built with CUDA support.";
 #endif
 }

 inline Context Context::FromString(const std::string& str) {
   Context ret;
   try {
     const std::string::size_type l = str.find('(');
     CHECK_NE(l, std::string::npos);
     const std::string::size_type r = str.find(')');
     CHECK_EQ(r, str.length()-1);

     const std::string type = str.substr(0, l);
     int id = std::stoi(str.substr(l+1, r-l-1));
     if (type == "cpu") {
       ret = CPU(id);
     } else if (type == "gpu") {
       ret = GPU(id);
     } else if (type == "cpu_pinned") {
       ret = CPUPinned(id);
     } else if (type == "cpu_shared") {
       ret = CPUShared(id);
     } else {
       LOG(FATAL) << "Invalid context string " << str;
     }
   } catch (...) {
     LOG(FATAL) << "Invalid context string " << str;
   }
   return ret;
 }

 inline std::ostream& operator<<(std::ostream &out, const Context &ctx) {
   if (ctx.dev_type == Context::kCPU) {
     out << "cpu(";
   } else if (ctx.dev_type == Context::kGPU) {
     out << "gpu(";
   } else if (ctx.dev_type == Context::kCPUPinned) {
     out << "cpu_pinned(";
   } else if (ctx.dev_type == Context::kCPUShared) {
     out << "cpu_shared(";
   } else {
     out << "unknown(";
   }
   out << ctx.dev_id << ")";
   return out;
 }

 // describe op registration point
 #define STRINGIZE_DETAIL(x) #x
 #define STRINGIZE(x) STRINGIZE_DETAIL(x)
 #define MXNET_DESCRIBE(...) describe(__VA_ARGS__ "\n\nFrom:" __FILE__ ":" STRINGIZE(__LINE__))
 #define ADD_FILELINE "\n\nDefined in " __FILE__ ":L" STRINGIZE(__LINE__)


 #if MXNET_USE_MKLDNN == 1
 constexpr size_t kMKLDNNAlign = 64;
 #endif

 }  // namespace mxnet

 namespace std {
 template<> struct hash<mxnet::Context> {
   size_t operator()(const mxnet::Context& ctx) const {
     size_t res = 0;
     res = dmlc::HashCombine(res, static_cast<size_t>(ctx.dev_type));
     res = dmlc::HashCombine(res, static_cast<size_t>(ctx.dev_id));
     return res;
   }
 };

 #if __cplusplus < 201402L && !defined(_MSC_VER)
 template<typename T, typename... Args>
 inline std::unique_ptr<T> make_unique(Args&&... args) {
   return std::unique_ptr<T>(new T(std::forward<Args>(args)...));
 }
 #endif
 }  // namespace std

 #include "./tensor_blob.h"
 #endif  // MXNET_BASE_H_
mxnet::Context::dev_mask
DeviceType dev_mask() const
Get corresponding device mask.
Definition: base.h:120

mxnet::Context::kMaxDevID
static const int32_t kMaxDevID
the maximal device index
Definition: base.h:174

mxnet::RunContext::is_bulk
bool is_bulk
indicator of whether this execution is run in bulk mode
Definition: base.h:351

mxnet
namespace of mxnet
Definition: base.h:89

mxnet::Context::GetGPUMemoryInformation
static void GetGPUMemoryInformation(int dev, uint64_t *free, uint64_t *total)
get the free and total available memory on a GPU

mxnet::SyncedGPUAuxStream::SyncedGPUAuxStream
SyncedGPUAuxStream(GPUAuxStream *gpu_aux_stream)
constructor.
Definition: base.h:308

mxnet::RunContext::get_stream
mshadow::Stream< xpu > * get_stream() const
get mshadow stream from Context
Definition: base.h:358

mxnet::GPUAuxStream::PostAuxStreamUseSync
void PostAuxStreamUseSync()
Makes future primary stream work wait on the completion of existing aux stream work.
Definition: base.h:270

mxnet::Context::GetGPUCount
static int32_t GetGPUCount()

mxnet::Context::Load
bool Load(dmlc::Stream *strm)
load the content from binary stream
Definition: base.h:166

mxnet::real_t
mshadow::default_real_t real_t
data type that will be used to store ndarray
Definition: base.h:97

mxnet::Context::GPU
static Context GPU(int32_t dev_id=-1)

std
Definition: tuple.h:709

mxnet::GPUAuxStream::GPUAuxStream
GPUAuxStream(mshadow::Stream< gpu > *primary_stream)
constructor.
Definition: base.h:235

mxnet::RunContext::ctx
Context ctx
base Context
Definition: base.h:339

mxnet::Context::operator<
bool operator<(const Context &b) const
Comparator, used to enable Context as std::map key.

mxnet::Context::kMaxDevType
static const int32_t kMaxDevType
the maximal device type
Definition: base.h:172

mxnet::RunContext
execution time context. The information needed in runtime for actual execution.
Definition: base.h:337

mxnet::GPUAuxStream
Holds an auxiliary mshadow gpu stream that can be synced with a primary stream.
Definition: base.h:229

mxnet::GPUAuxStream::GetStream
mshadow::Stream< gpu > * GetStream()
Getter for created auxiliary stream.
Definition: base.h:276

mxnet::SyncedGPUAuxStream
Provides automatic coordination of an auxilary stream with a primary one. This object, upon construction, prepares an aux stream for use by syncing it with enqueued primary-stream work. Object destruction will sync again so future primary-stream work will wait on enqueued aux-stream work. If MXNET_GPU_WORKER_NSTREAMS == 1, then this defaults simply: the primary stream will equal the aux stream and the syncs will be executed as nops. See ./src/operator/cudnn/cudnn_convolution-inl.h for a usage example.
Definition: base.h:302

mxnet::RunContext::aux_stream
void * aux_stream
the auxiliary stream of the device, can be NULL or Stream<gpu>* in GPU mode
Definition: base.h:347

mxnet::GPUAuxStream::~GPUAuxStream
~GPUAuxStream()
destructor
Definition: base.h:252

mxnet::Context::dev_type
DeviceType dev_type
the device type we run the op on
Definition: base.h:111

mxnet::Context::kCPU
Definition: base.h:105

mxnet::Context::GetGPUStreamsPerWorker
static int32_t GetGPUStreamsPerWorker()

mxnet::Context::dev_id
int32_t dev_id
device id we are going to run it on
Definition: base.h:113

mxnet::Context::kCPUPinned
Definition: base.h:107

mxnet::SyncedGPUAuxStream::GetStream
mshadow::Stream< gpu > * GetStream() const
Getter for underlying mshadow::Stream<gpu>.
Definition: base.h:324

mxnet::RunContext::stream
void * stream
the stream of the device, can be NULL or Stream<gpu>* in GPU mode
Definition: base.h:343

mxnet::Context::Save
void Save(dmlc::Stream *strm) const
save the content into binary stream
Definition: base.h:157

mxnet::gpu
mshadow::gpu gpu
mxnet gpu
Definition: base.h:93

mxnet::RunContext::get_ctx
const Context & get_ctx() const
get the base Context from RunContext
Definition: base.h:371

mxnet::Context::kGPU
Definition: base.h:106

mxnet::Context::DeviceType
DeviceType
Type of device.
Definition: base.h:104

mxnet::Context::CPUShared
static Context CPUShared(int32_t dev_id=0)

mxnet::cpu
mshadow::cpu cpu
mxnet cpu
Definition: base.h:91

mxnet::SyncedGPUAuxStream::~SyncedGPUAuxStream
~SyncedGPUAuxStream()
destructor
Definition: base.h:312

mxnet::Context::real_dev_id
int real_dev_id() const
Returns dev_id for kGPU and kCPUPinned, 0 otherwise.
Definition: base.h:127

mxnet::Op
nnvm::Op Op
operator structure from NNVM
Definition: base.h:99

mxnet::Context::Context
Context()
default constructor
Definition: base.h:115

mxnet::Context::Create
static Context Create(DeviceType dev_type, int32_t dev_id=-1)
Create a new context.

mxnet::Context::operator!=
bool operator!=(const Context &b) const
check if current context not equals another one
Definition: base.h:150

mxnet::Context::CPU
static Context CPU(int32_t dev_id=0)

mxnet::cpp::operator<<
std::ostream & operator<<(std::ostream &out, const NDArray &ndarray)

mxnet::RunContext::get_gpu_aux_stream
SyncedGPUAuxStream get_gpu_aux_stream() const
get an RAII object that transparently handles the syncing of the auxiliary stream.
Definition: base.h:366

mxnet::Context::CPUPinned
static Context CPUPinned(int32_t dev_id=-1)

mxnet::GPUAuxStream::PreAuxStreamUseSync
void PreAuxStreamUseSync()
Makes future aux stream work wait on the completion of existing primary stream work.
Definition: base.h:262

tuple.h
Data structure Tuple and TShape to store dynamic sized shapes.

mxnet::Context::kCPUShared
Definition: base.h:108

mxnet::Context::FromString
static Context FromString(const std::string &str)

mxnet::GPUAuxStream::StreamSync
static void StreamSync(mshadow::Stream< gpu > *s1, mshadow::Stream< gpu > *s2, cudaEvent_t event)
Make future work enqueued to s2 wait on completion of current work enqueued to s1.
Definition: base.h:283

mxnet::index_t
mshadow::index_t index_t
index type usually use unsigned
Definition: base.h:95

tensor_blob.h
TBlob class that holds common representation of arbirary dimension tensor, can be used to transformed...

mxnet::Context
Context information about the execution environment.
Definition: base.h:102

libinfo.h
get features of the MXNet library at runtime

mxnet::Context::operator==
bool operator==(const Context &b) const
check if current context equals another one
Definition: base.h:142

mxnet::cpp::index_t
unsigned index_t
Definition: base.h:37