CUDA debugging utilities.
More...
#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <dmlc/optional.h>
#include <mshadow/base.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <curand.h>
Go to the source code of this file.
CUDA debugging utilities.
Copyright (c) 2015 by Contributors
#define CHECK_CUDA_ERROR |
( |
|
msg | ) |
|
Value:{ \
cudaError_t e = cudaGetLastError(); \
CHECK_EQ(e, cudaSuccess) << (msg) << " CUDA: " << cudaGetErrorString(e); \
}
Macros/inlines to assist CLion to parse Cuda files (*.cu, *.cuh)
When compiling a device function, check that the architecture is >= Kepler (3.0) Note that CUDA_ARCH is not defined outside of a device function
Check CUDA error.
- Parameters
-
msg | Message to print if an error occured. |
#define CUBLAS_CALL |
( |
|
func | ) |
|
Value:{ \
cublasStatus_t e = (func); \
CHECK_EQ(e, CUBLAS_STATUS_SUCCESS) \
}
const char * CublasGetErrorString(cublasStatus_t error)
Get string representation of cuBLAS errors.
Definition: cuda_utils.h:184
Protected cuBLAS call.
- Parameters
-
It checks for cuBLAS errors after invocation of the expression.
#define CUDA_CALL |
( |
|
func | ) |
|
Value:{ \
cudaError_t e = (func); \
CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading) \
<< "CUDA: " << cudaGetErrorString(e); \
}
Protected CUDA call.
- Parameters
-
It checks for CUDA errors after invocation of the expression.
#define CUDA_DRIVER_CALL |
( |
|
func | ) |
|
Value:{ \
CUresult e = (func); \
if (e != CUDA_SUCCESS) { \
char const * err_msg = nullptr; \
if (cuGetErrorString(e, &err_msg) == CUDA_ERROR_INVALID_VALUE) { \
LOG(FATAL) << "CUDA Driver: Unknown error " << e; \
} else { \
LOG(FATAL) << "CUDA Driver: " << err_msg; \
} \
} \
}
Protected CUDA driver call.
- Parameters
-
It checks for CUDA driver errors after invocation of the expression.
#define CUDA_NOUNROLL _Pragma("nounroll") |
#define CUDA_UNROLL _Pragma("unroll") |
#define CURAND_CALL |
( |
|
func | ) |
|
Value:{ \
curandStatus_t e = (func); \
CHECK_EQ(e, CURAND_STATUS_SUCCESS) \
}
const char * CurandGetErrorString(curandStatus_t status)
Get string representation of cuRAND errors.
Definition: cuda_utils.h:244
Protected cuRAND call.
- Parameters
-
It checks for cuRAND errors after invocation of the expression.
#define CUSOLVER_CALL |
( |
|
func | ) |
|
Value:{ \
cusolverStatus_t e = (func); \
CHECK_EQ(e, CUSOLVER_STATUS_SUCCESS) \
}
const char * CusolverGetErrorString(cusolverStatus_t error)
Get string representation of cuSOLVER errors.
Definition: cuda_utils.h:215
Protected cuSolver call.
- Parameters
-
It checks for cuSolver errors after invocation of the expression.
#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT true |
#define MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT false |
Value:{ \
nvrtcResult result = x; \
CHECK_EQ(result, NVRTC_SUCCESS) \
<< #x " failed with error " \
<< nvrtcGetErrorString(result); \
}
Protected NVRTC call.
- Parameters
-
It checks for NVRTC errors after invocation of the expression.
int ComputeCapabilityMajor |
( |
int |
device_id | ) |
|
|
inline |
Determine major version number of the gpu's cuda compute architecture.
- Parameters
-
device_id | The device index of the cuda-capable gpu of interest. |
- Returns
- the major version number of the gpu's cuda compute architecture.
int ComputeCapabilityMinor |
( |
int |
device_id | ) |
|
|
inline |
Determine minor version number of the gpu's cuda compute architecture.
- Parameters
-
device_id | The device index of the cuda-capable gpu of interest. |
- Returns
- the minor version number of the gpu's cuda compute architecture.
bool GetEnvAllowTensorCore |
( |
| ) |
|
|
inline |
Returns global policy for TensorCore algo use.
- Returns
- whether to allow TensorCore algo (if not specified by the Operator locally).
bool GetEnvAllowTensorCoreConversion |
( |
| ) |
|
|
inline |
Returns global policy for TensorCore implicit type casting.
int SMArch |
( |
int |
device_id | ) |
|
|
inline |
Return the integer SM architecture (e.g. Volta = 70).
- Parameters
-
device_id | The device index of the cuda-capable gpu of interest. |
- Returns
- the gpu's cuda compute architecture as an int.
bool SupportsFloat16Compute |
( |
int |
device_id | ) |
|
|
inline |
Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative.
- Parameters
-
device_id | The device index of the cuda-capable gpu of interest. |
- Returns
- whether the gpu's architecture supports float16 math.
bool SupportsTensorCore |
( |
int |
device_id | ) |
|
|
inline |
Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative.
- Parameters
-
device_id | The device index of the cuda-capable gpu of interest. |
- Returns
- whether the gpu's architecture supports Tensor Core math.