mxnet
Namespaces | Macros | Functions
cuda_utils.h File Reference

CUDA debugging utilities. More...

#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <dmlc/optional.h>
#include <mshadow/base.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <curand.h>
Include dependency graph for cuda_utils.h:

Go to the source code of this file.

Namespaces

 mxnet
 namespace of mxnet
 
 mxnet::common
 
 mxnet::common::cuda
 common utils for cuda
 

Macros

#define CHECK_CUDA_ERROR(msg)
 Check CUDA error. More...
 
#define CUDA_CALL(func)
 Protected CUDA call. More...
 
#define CUBLAS_CALL(func)
 Protected cuBLAS call. More...
 
#define CUSOLVER_CALL(func)
 Protected cuSolver call. More...
 
#define CURAND_CALL(func)
 Protected cuRAND call. More...
 
#define NVRTC_CALL(x)
 Protected NVRTC call. More...
 
#define CUDA_DRIVER_CALL(func)
 Protected CUDA driver call. More...
 
#define CUDA_UNROLL   _Pragma("unroll")
 
#define CUDA_NOUNROLL   _Pragma("nounroll")
 
#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT   true
 

Functions

const char * mxnet::common::cuda::CublasGetErrorString (cublasStatus_t error)
 Get string representation of cuBLAS errors. More...
 
const char * mxnet::common::cuda::CusolverGetErrorString (cusolverStatus_t error)
 Get string representation of cuSOLVER errors. More...
 
const char * mxnet::common::cuda::CurandGetErrorString (curandStatus_t status)
 Get string representation of cuRAND errors. More...
 
template<typename DType >
DType __device__ mxnet::common::cuda::CudaMax (DType a, DType b)
 
template<typename DType >
DType __device__ mxnet::common::cuda::CudaMin (DType a, DType b)
 
int ComputeCapabilityMajor (int device_id)
 Determine major version number of the gpu's cuda compute architecture. More...
 
int ComputeCapabilityMinor (int device_id)
 Determine minor version number of the gpu's cuda compute architecture. More...
 
int SMArch (int device_id)
 Return the integer SM architecture (e.g. Volta = 70). More...
 
bool SupportsFloat16Compute (int device_id)
 Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative. More...
 
bool SupportsTensorCore (int device_id)
 Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative. More...
 
bool GetEnvAllowTensorCore ()
 Returns global policy for TensorCore algo use. More...
 

Detailed Description

CUDA debugging utilities.

Macro Definition Documentation

#define CHECK_CUDA_ERROR (   msg)
Value:
{ \
cudaError_t e = cudaGetLastError(); \
CHECK_EQ(e, cudaSuccess) << (msg) << " CUDA: " << cudaGetErrorString(e); \
}

Check CUDA error.

Parameters
msgMessage to print if an error occured.
#define CUBLAS_CALL (   func)
Value:
{ \
cublasStatus_t e = (func); \
CHECK_EQ(e, CUBLAS_STATUS_SUCCESS) \
}
const char * CublasGetErrorString(cublasStatus_t error)
Get string representation of cuBLAS errors.
Definition: cuda_utils.h:64

Protected cuBLAS call.

Parameters
funcExpression to call.

It checks for cuBLAS errors after invocation of the expression.

#define CUDA_CALL (   func)
Value:
{ \
cudaError_t e = (func); \
CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading) \
<< "CUDA: " << cudaGetErrorString(e); \
}

Protected CUDA call.

Parameters
funcExpression to call.

It checks for CUDA errors after invocation of the expression.

#define CUDA_DRIVER_CALL (   func)
Value:
{ \
CUresult e = (func); \
if (e != CUDA_SUCCESS) { \
char const * err_msg = nullptr; \
if (cuGetErrorString(e, &err_msg) == CUDA_ERROR_INVALID_VALUE) { \
LOG(FATAL) << "CUDA Driver: Unknown error " << e; \
} else { \
LOG(FATAL) << "CUDA Driver: " << err_msg; \
} \
} \
}

Protected CUDA driver call.

Parameters
funcExpression to call.

It checks for CUDA driver errors after invocation of the expression.

#define CUDA_NOUNROLL   _Pragma("nounroll")
#define CUDA_UNROLL   _Pragma("unroll")
#define CURAND_CALL (   func)
Value:
{ \
curandStatus_t e = (func); \
CHECK_EQ(e, CURAND_STATUS_SUCCESS) \
}
const char * CurandGetErrorString(curandStatus_t status)
Get string representation of cuRAND errors.
Definition: cuda_utils.h:124

Protected cuRAND call.

Parameters
funcExpression to call.

It checks for cuRAND errors after invocation of the expression.

#define CUSOLVER_CALL (   func)
Value:
{ \
cusolverStatus_t e = (func); \
CHECK_EQ(e, CUSOLVER_STATUS_SUCCESS) \
}
const char * CusolverGetErrorString(cusolverStatus_t error)
Get string representation of cuSOLVER errors.
Definition: cuda_utils.h:95

Protected cuSolver call.

Parameters
funcExpression to call.

It checks for cuSolver errors after invocation of the expression.

#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT   true
#define NVRTC_CALL (   x)
Value:
{ \
nvrtcResult result = x; \
CHECK_EQ(result, NVRTC_SUCCESS) \
<< #x " failed with error " \
<< nvrtcGetErrorString(result); \
}

Protected NVRTC call.

Parameters
funcExpression to call.

It checks for NVRTC errors after invocation of the expression.

Function Documentation

int ComputeCapabilityMajor ( int  device_id)
inline

Determine major version number of the gpu's cuda compute architecture.

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
the major version number of the gpu's cuda compute architecture.
int ComputeCapabilityMinor ( int  device_id)
inline

Determine minor version number of the gpu's cuda compute architecture.

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
the minor version number of the gpu's cuda compute architecture.
bool GetEnvAllowTensorCore ( )
inline

Returns global policy for TensorCore algo use.

Returns
whether to allow TensorCore algo (if not specified by the Operator locally).
int SMArch ( int  device_id)
inline

Return the integer SM architecture (e.g. Volta = 70).

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
the gpu's cuda compute architecture as an int.
bool SupportsFloat16Compute ( int  device_id)
inline

Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative.

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
whether the gpu's architecture supports float16 math.
bool SupportsTensorCore ( int  device_id)
inline

Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative.

Parameters
device_idThe device index of the cuda-capable gpu of interest.
Returns
whether the gpu's architecture supports Tensor Core math.