CUDA debugging utilities. More...

#include <dmlc/logging.h>
#include <dmlc/parameter.h>
#include <dmlc/optional.h>
#include <mshadow/base.h>
#include <cuda_runtime.h>
#include <cublas_v2.h>
#include <curand.h>

Include dependency graph for cuda_utils.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Namespaces
	mxnet
	namespace of mxnet

	mxnet::common

	mxnet::common::cuda
	common utils for cuda

Macros
#define	CHECK_CUDA_ERROR(msg)
	Check CUDA error. More...

#define	CUDA_CALL(func)
	Protected CUDA call. More...

#define	CUBLAS_CALL(func)
	Protected cuBLAS call. More...

#define	CUSOLVER_CALL(func)
	Protected cuSolver call. More...

#define	CURAND_CALL(func)
	Protected cuRAND call. More...

#define	NVRTC_CALL(x)
	Protected NVRTC call. More...

#define	CUDA_DRIVER_CALL(func)
	Protected CUDA driver call. More...

#define	CUDA_UNROLL _Pragma("unroll")

#define	CUDA_NOUNROLL _Pragma("nounroll")

#define	MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT true

Functions
const char *	mxnet::common::cuda::CublasGetErrorString (cublasStatus_t error)
	Get string representation of cuBLAS errors. More...

const char *	mxnet::common::cuda::CusolverGetErrorString (cusolverStatus_t error)
	Get string representation of cuSOLVER errors. More...

const char *	mxnet::common::cuda::CurandGetErrorString (curandStatus_t status)
	Get string representation of cuRAND errors. More...

template<typename DType >
DType __device__	mxnet::common::cuda::CudaMax (DType a, DType b)

template<typename DType >
DType __device__	mxnet::common::cuda::CudaMin (DType a, DType b)

int	ComputeCapabilityMajor (int device_id)
	Determine major version number of the gpu's cuda compute architecture. More...

int	ComputeCapabilityMinor (int device_id)
	Determine minor version number of the gpu's cuda compute architecture. More...

int	SMArch (int device_id)
	Return the integer SM architecture (e.g. Volta = 70). More...

bool	SupportsFloat16Compute (int device_id)
	Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative. More...

bool	SupportsTensorCore (int device_id)
	Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative. More...

bool	GetEnvAllowTensorCore ()
	Returns global policy for TensorCore algo use. More...

Detailed Description

CUDA debugging utilities.

Macro Definition Documentation

#define CHECK_CUDA_ERROR ( msg )

Value:

{                                                                          \
    cudaError_t e = cudaGetLastError();                                      \
    CHECK_EQ(e, cudaSuccess) << (msg) << " CUDA: " << cudaGetErrorString(e); \
  }

Check CUDA error.

Parameters

msg	Message to print if an error occured.

#define CUBLAS_CALL ( func )

Value:

{                                                             \
    cublasStatus_t e = (func);                                  \
    CHECK_EQ(e, CUBLAS_STATUS_SUCCESS)                          \
        << "cuBLAS: " << mxnet::common::cuda::CublasGetErrorString(e); \
  }

Protected cuBLAS call.

Parameters

func	Expression to call.

It checks for cuBLAS errors after invocation of the expression.

#define CUDA_CALL ( func )

Value:

{                                                                \
    cudaError_t e = (func);                                        \
    CHECK(e == cudaSuccess || e == cudaErrorCudartUnloading)       \
        << "CUDA: " << cudaGetErrorString(e);                      \
  }

Protected CUDA call.

Parameters

func	Expression to call.

It checks for CUDA errors after invocation of the expression.

#define CUDA_DRIVER_CALL ( func )

Value:

{                                                                     \
    CUresult e = (func);                                                \
    if (e != CUDA_SUCCESS) {                                            \
      char const * err_msg = nullptr;                                         \
      if (cuGetErrorString(e, &err_msg) == CUDA_ERROR_INVALID_VALUE) {  \
        LOG(FATAL) << "CUDA Driver: Unknown error " << e;               \
      } else {                                                          \
        LOG(FATAL) << "CUDA Driver: " << err_msg;                       \
      }                                                                 \
    }                                                                   \
  }

Protected CUDA driver call.

Parameters

func	Expression to call.

It checks for CUDA driver errors after invocation of the expression.

#define CUDA_NOUNROLL _Pragma("nounroll")

#define CUDA_UNROLL _Pragma("unroll")

#define CURAND_CALL ( func )

Value:

{                                                             \
    curandStatus_t e = (func);                                  \
    CHECK_EQ(e, CURAND_STATUS_SUCCESS)                          \
        << "cuRAND: " << mxnet::common::cuda::CurandGetErrorString(e); \
  }

Protected cuRAND call.

Parameters

func	Expression to call.

It checks for cuRAND errors after invocation of the expression.

#define CUSOLVER_CALL ( func )

Value:

{                                                                 \
    cusolverStatus_t e = (func);                                    \
    CHECK_EQ(e, CUSOLVER_STATUS_SUCCESS)                            \
        << "cuSolver: " << mxnet::common::cuda::CusolverGetErrorString(e); \
  }

Protected cuSolver call.

Parameters

func	Expression to call.

It checks for cuSolver errors after invocation of the expression.

#define MXNET_CUDA_ALLOW_TENSOR_CORE_DEFAULT true

#define NVRTC_CALL ( x )

Value:

{                                                     \
    nvrtcResult result = x;                             \
    CHECK_EQ(result, NVRTC_SUCCESS)                     \
      << #x " failed with error "                       \
      << nvrtcGetErrorString(result);                   \
  }

Protected NVRTC call.

Parameters

func	Expression to call.

It checks for NVRTC errors after invocation of the expression.

Function Documentation

int ComputeCapabilityMajor ( int device_id )

inline

Determine major version number of the gpu's cuda compute architecture.

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: the major version number of the gpu's cuda compute architecture.

int ComputeCapabilityMinor ( int device_id )

inline

Determine minor version number of the gpu's cuda compute architecture.

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: the minor version number of the gpu's cuda compute architecture.

bool GetEnvAllowTensorCore ( )

inline

Returns global policy for TensorCore algo use.

Returns: whether to allow TensorCore algo (if not specified by the Operator locally).

int SMArch ( int device_id )

inline

Return the integer SM architecture (e.g. Volta = 70).

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: the gpu's cuda compute architecture as an int.

bool SupportsFloat16Compute ( int device_id )

inline

Determine whether a cuda-capable gpu's architecture supports float16 math. Assume not if device_id is negative.

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: whether the gpu's architecture supports float16 math.

bool SupportsTensorCore ( int device_id )

inline

Determine whether a cuda-capable gpu's architecture supports Tensor Core math. Assume not if device_id is negative.

Parameters

device_id The device index of the cuda-capable gpu of interest.

Returns: whether the gpu's architecture supports Tensor Core math.

Namespaces

Macros

Functions

Detailed Description

Macro Definition Documentation

Function Documentation