CUDA debugging utilities. More...

#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <dmlc/optional.h>
#include <mshadow/base.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <curand.h>

Include dependency graph for cuda_utils.h:

Go to the source code of this file.

Classes
class	mxnet::common::cuda::DeviceStore

Namespaces
	mxnet
	namespace of mxnet

	mxnet::common

	mxnet::common::cuda
	common utils for cuda

Macros
#define	CHECK_CUDA_ERROR(msg)
	Macros/inlines to assist CLion to parse Cuda files (.cu, .cuh) More...

#define	CUDA_CALL(func)
	Protected CUDA call. More...

#define	CUBLAS_CALL(func)
	Protected cuBLAS call. More...

#define	CUSOLVER_CALL(func)
	Protected cuSolver call. More...

#define	CURAND_CALL(func)
	Protected cuRAND call. More...

#define	NVRTC_CALL(x)
	Protected NVRTC call. More...

#define	CUDA_DRIVER_CALL(func)
	Protected CUDA driver call. More...

#define	CUDA_UNROLL _Pragma("unroll")

#define	CUDA_NOUNROLL _Pragma("nounroll")

#define	MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT true

#define	MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT false

Functions
const char *	mxnet::common::cuda::CublasGetErrorString (cublasStatus_t error)
	Get string representation of cuBLAS errors. More...

const char *	mxnet::common::cuda::CusolverGetErrorString (cusolverStatus_t error)
	Get string representation of cuSOLVER errors. More...

const char *	mxnet::common::cuda::CurandGetErrorString (curandStatus_t status)
	Get string representation of cuRAND errors. More...

template<typename DType >
DType __device__	mxnet::common::cuda::CudaMax (DType a, DType b)

template<typename DType >
DType __device__	mxnet::common::cuda::CudaMin (DType a, DType b)

int	ComputeCapabilityMajor (int device_id)
	Determine major version number of the gpu's cuda compute architecture. More...

int	ComputeCapabilityMinor (int device_id)
	Determine minor version number of the gpu's cuda compute architecture. More...

int	SMArch (int device_id)
	Return the integer SM architecture (e.g. Volta = 70). More...

bool	SupportsFloat16Compute (int device_id)
	Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative. More...

bool	SupportsTensorCore (int device_id)
	Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative. More...

bool	GetEnvAllowTensorCore ()
	Returns global policy for TensorCore algo use. More...

bool	GetEnvAllowTensorCoreConversion ()
	Returns global policy for TensorCore implicit type casting. More...

Detailed Description

CUDA debugging utilities.

Macro Definition Documentation

#define CHECK_CUDA_ERROR ( msg )

Value:

{                                                                          \
    cudaError_t e = cudaGetLastError();                                      \
    CHECK_EQ(e, cudaSuccess) << (msg) << " CUDA: " << cudaGetErrorString(e); \
  }

Macros/inlines to assist CLion to parse Cuda files (*.cu, *.cuh)

When compiling a device function, check that the architecture is >= Kepler (3.0) Note that CUDA_ARCH is not defined outside of a device function

Check CUDA error.

Parameters

msg	Message to print if an error occured.

#define CUBLAS_CALL ( func )

Value:

{                                                             \
    cublasStatus_t e = (func);                                  \
    CHECK_EQ(e, CUBLAS_STATUS_SUCCESS)                          \
        << "cuBLAS: " << mxnet::common::cuda::CublasGetErrorString(e); \
  }

Protected cuBLAS call.

Parameters

func	Expression to call.

It checks for cuBLAS errors after invocation of the expression.

#define CUDA_CALL ( func )

Value:

{                                                                \
    cudaError_t e = (func);                                        \
    CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading)       \
        << "CUDA: " << cudaGetErrorString(e);                      \
  }

Protected CUDA call.

Parameters

func	Expression to call.

It checks for CUDA errors after invocation of the expression.

#define CUDA_DRIVER_CALL ( func )

Value:

{                                                                     \
    CUresult e = (func);                                                \
    if (e != CUDA_SUCCESS) {                                            \
      char const * err_msg = nullptr;                                         \
      if (cuGetErrorString(e, &err_msg) == CUDA_ERROR_INVALID_VALUE) {  \
        LOG(FATAL) << "CUDA Driver: Unknown error " << e;               \
      } else {                                                          \
        LOG(FATAL) << "CUDA Driver: " << err_msg;                       \
      }                                                                 \
    }                                                                   \
  }

Protected CUDA driver call.

Parameters

func	Expression to call.

It checks for CUDA driver errors after invocation of the expression.

#define CUDA_NOUNROLL _Pragma("nounroll")

#define CUDA_UNROLL _Pragma("unroll")

#define CURAND_CALL ( func )

Value:

{                                                             \
    curandStatus_t e = (func);                                  \
    CHECK_EQ(e, CURAND_STATUS_SUCCESS)                          \
        << "cuRAND: " << mxnet::common::cuda::CurandGetErrorString(e); \
  }

Protected cuRAND call.

Parameters

func	Expression to call.

It checks for cuRAND errors after invocation of the expression.

#define CUSOLVER_CALL ( func )

Value:

{                                                                 \
    cusolverStatus_t e = (func);                                    \
    CHECK_EQ(e, CUSOLVER_STATUS_SUCCESS)                            \
        << "cuSolver: " << mxnet::common::cuda::CusolverGetErrorString(e); \
  }

Protected cuSolver call.

Parameters

func	Expression to call.

It checks for cuSolver errors after invocation of the expression.

#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT true

#define MXNET_CUDA_TENSOR_OP_MATH_ALLOW_CONVERSION_DEFAULT false

#define NVRTC_CALL ( x )

Value:

{                                                     \
    nvrtcResult result = x;                             \
    CHECK_EQ(result, NVRTC_SUCCESS)                     \
      << #x " failed with error "                       \
      << nvrtcGetErrorString(result);                   \
  }

Protected NVRTC call.

Parameters

func	Expression to call.

It checks for NVRTC errors after invocation of the expression.

Function Documentation

int ComputeCapabilityMajor ( int device_id )

inline

Determine major version number of the gpu's cuda compute architecture.

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: the major version number of the gpu's cuda compute architecture.

int ComputeCapabilityMinor ( int device_id )

inline

Determine minor version number of the gpu's cuda compute architecture.

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: the minor version number of the gpu's cuda compute architecture.

bool GetEnvAllowTensorCore ( )

inline

Returns global policy for TensorCore algo use.

Returns: whether to allow TensorCore algo (if not specified by the Operator locally).

bool GetEnvAllowTensorCoreConversion ( )

inline

Returns global policy for TensorCore implicit type casting.

int SMArch ( int device_id )

inline

Return the integer SM architecture (e.g. Volta = 70).

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: the gpu's cuda compute architecture as an int.

bool SupportsFloat16Compute ( int device_id )

inline

Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative.

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: whether the gpu's architecture supports float16 math.

bool SupportsTensorCore ( int device_id )

inline

Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative.

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: whether the gpu's architecture supports Tensor Core math.

Classes

Namespaces

Macros

Functions

Detailed Description

Macro Definition Documentation

Function Documentation