docs/api/stream__gpu-inl_8h_source.html

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 #ifndef MSHADOW_STREAM_GPU_INL_H_
 #define MSHADOW_STREAM_GPU_INL_H_
 #include <memory>
 #include "./base.h"
 #include "./tensor.h"
 #include "./logging.h"

 namespace mshadow {
 #if MSHADOW_USE_CUDA == 1
 // Stream alocation
 // actual implementation of GPU stream in CUDA
 template<>
 struct Stream<gpu> {
   enum HandleState {
     NoHandle = 0,
     OwnHandle = 1,
   };
   cudaStream_t stream_;
   cublasHandle_t blas_handle_;
   #if MSHADOW_USE_CUSOLVER == 1
   cusolverDnHandle_t solver_handle_;
   #endif

   #if MSHADOW_USE_CUDNN == 1
   cudnnHandle_t dnn_handle_;
   #endif

   HandleState blas_handle_ownership_;
   HandleState solver_handle_ownership_;
   HandleState dnn_handle_ownership_;
   cudaDeviceProp prop;
   int dev_id;

   Stream(void)
     : stream_(0)
       , blas_handle_(0)
 #if MSHADOW_USE_CUDNN == 1
       , dnn_handle_(0)
 #endif
       , blas_handle_ownership_(NoHandle)
       , solver_handle_ownership_(NoHandle)
       , dnn_handle_ownership_(NoHandle) {}
   inline void Wait(void) {
     MSHADOW_CUDA_CALL(cudaStreamSynchronize(stream_));
   }
   inline bool CheckIdle(void) {
     cudaError_t err = cudaStreamQuery(stream_);
     if (err == cudaSuccess) return true;
     if (err == cudaErrorNotReady) return false;
     LOG(FATAL) << cudaGetErrorString(err);
     return false;
   }
   inline static cudaStream_t GetStream(Stream<gpu> *stream) {
     if (stream == NULL) {
 #if MSHADOW_FORCE_STREAM
       LOG(FATAL) << "Default GPU stream was used when MSHADOW_FORCE_STREAM was on";
 #endif
       return 0;
     } else {
       return stream->stream_;
     }
   }
   inline static cublasHandle_t GetBlasHandle(Stream<gpu> *stream) {
     if (stream == NULL) {
       return 0;
     } else {
       CHECK_NE(stream->blas_handle_ownership_, NoHandle)
         << "No handle exist in source stream";
       return stream->blas_handle_;
     }
   }
   inline void DestroyBlasHandle() {
     if (blas_handle_ownership_ == OwnHandle) {
       cublasStatus_t err = cublasDestroy(blas_handle_);
       blas_handle_ownership_ = NoHandle;
       CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Destory cublas handle failed";
     }
   }
   inline void CreateBlasHandle() {
     this->DestroyBlasHandle();
     cublasStatus_t err = cublasCreate(&blas_handle_);
     blas_handle_ownership_ = OwnHandle;
     CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Create cublas handle failed";
     err = cublasSetStream(blas_handle_, stream_);
     CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Setting cublas stream failed";
   }
 #if MSHADOW_USE_CUSOLVER == 1
   inline static cusolverDnHandle_t GetSolverHandle(Stream<gpu> *stream) {
     if (stream == NULL) {
       return 0;
     } else {
       CHECK_NE(stream->solver_handle_ownership_, NoHandle) << "No handle exist in source stream";
       return stream->solver_handle_;
     }
   }
 #endif
   inline void DestroySolverHandle() {
 #if MSHADOW_USE_CUSOLVER == 1
     if (solver_handle_ownership_ == OwnHandle) {
       cusolverStatus_t err = cusolverDnDestroy(solver_handle_);
       CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) << "Destory cusolver handle failed";
     }
 #endif
   }
   inline void CreateSolverHandle() {
 #if MSHADOW_USE_CUSOLVER == 1
     this->DestroySolverHandle();
     cusolverStatus_t err = cusolverDnCreate(&solver_handle_);
     CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) << "Create cusolver handle failed";
     err = cusolverDnSetStream(solver_handle_, stream_);
     CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) << "Setting cusolver stream failed";
     this->solver_handle_ownership_ = OwnHandle;
 #endif
   }
 // #if MSHADOW_USE_CUDNN && defined(__CUDACC__)
 #if MSHADOW_USE_CUDNN == 1
   inline static cudnnHandle_t GetDnnHandle(Stream<gpu> *stream) {
     if (stream == NULL) {
       return 0;
     } else {
       CHECK_NE(stream->dnn_handle_ownership_, NoHandle) << "No handle exist in source stream";
       return stream->dnn_handle_;
     }
   }
 #endif
   inline void DestroyDnnHandle() {
 // #if MSHADOW_USE_CUDNN && defined(__CUDACC__)
 #if MSHADOW_USE_CUDNN == 1
     if (dnn_handle_ownership_ == OwnHandle) {
       cudnnStatus_t err = cudnnDestroy(dnn_handle_);
       this->dnn_handle_ownership_ = NoHandle;
       CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
     }
 #endif
   }
   inline void CreateDnnHandle() {
 // #if MSHADOW_USE_CUDNN == 1 && defined(__CUDACC__)
 #if MSHADOW_USE_CUDNN == 1
     this->DestroyDnnHandle();
     cudnnStatus_t err = cudnnCreate(&dnn_handle_);
     CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
     // At this point, we have the resource which may need to be freed
     this->dnn_handle_ownership_ = OwnHandle;
     err = cudnnSetStream(dnn_handle_, stream_);
     CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
 #endif
   }
 };
 template<>
 inline void DeleteStream<gpu>(Stream<gpu> *stream) {
   if (stream) {
     MSHADOW_CUDA_CALL(cudaStreamDestroy(stream->stream_));
     stream->DestroyBlasHandle();
     stream->DestroySolverHandle();
     stream->DestroyDnnHandle();
     delete stream;
   }
 }
 template<>
 inline Stream<gpu> *NewStream<gpu>(bool create_blas_handle,
                                    bool create_dnn_handle,
                                    int dev_id) {
   // RAII on Cuda exception
   struct StreamDeleter { void operator()(Stream<gpu> *ptr) const { DeleteStream<gpu>(ptr); } };
   std::unique_ptr<Stream<gpu>, StreamDeleter> st(new Stream<gpu>());
   MSHADOW_CUDA_CALL(cudaStreamCreate(&st->stream_));
   if (create_blas_handle) {
     st->CreateBlasHandle();
     st->CreateSolverHandle();
   }
   if (create_dnn_handle) {
     st->CreateDnnHandle();
   }
   st->dev_id = dev_id;
   if (dev_id != -1) {
     MSHADOW_CUDA_CALL(cudaGetDeviceProperties(&st->prop, dev_id));
   }
   return st.release();
 }
 #endif
 }  // namespace mshadow
 #endif  // MSHADOW_STREAM_GPU_INL_H_
mshadow::Stream< gpu >::GetStream
static cudaStream_t GetStream(Stream< gpu > *stream)
returns actual cudaStream_t given an input GPU stream pointer
Definition: stream_gpu-inl.h:97

mshadow::Stream< gpu >::dnn_handle_ownership_
HandleState dnn_handle_ownership_
cudnn handle ownership
Definition: stream_gpu-inl.h:60

mshadow::Stream< gpu >
Definition: stream_gpu-inl.h:37

mshadow::Stream< gpu >::CheckIdle
bool CheckIdle(void)
query whether the the stream is idle
Definition: stream_gpu-inl.h:86

mshadow::Stream< gpu >::GetSolverHandle
static cusolverDnHandle_t GetSolverHandle(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:138

mshadow::Stream< gpu >::HandleState
HandleState
handle state
Definition: stream_gpu-inl.h:39

mshadow::Stream< gpu >::Stream
Stream(void)
Definition: stream_gpu-inl.h:66

mshadow::NewStream< gpu >
Stream< gpu > * NewStream< gpu >(bool create_blas_handle, bool create_dnn_handle, int dev_id)
Definition: stream_gpu-inl.h:210

mshadow::Stream< gpu >::DestroySolverHandle
void DestroySolverHandle()
Definition: stream_gpu-inl.h:147

MSHADOW_CUDA_CALL
#define MSHADOW_CUDA_CALL(func)
Protected cuda call in mshadow.
Definition: base.h:278

mshadow::Stream< gpu >::prop
cudaDeviceProp prop
cudaDeviceProp
Definition: stream_gpu-inl.h:62

tensor.h
header file of tensor data structure and functions This lib requires explicit memory allocation and d...

mshadow::gpu
device name GPU
Definition: tensor.h:46

mshadow::Stream< gpu >::blas_handle_ownership_
HandleState blas_handle_ownership_
cudnn handle
Definition: stream_gpu-inl.h:56

mshadow::Stream< gpu >::dev_id
int dev_id
dev id
Definition: stream_gpu-inl.h:64

mshadow::Stream< gpu >::solver_handle_ownership_
HandleState solver_handle_ownership_
cusolver handle ownership
Definition: stream_gpu-inl.h:58

mshadow::Stream< gpu >::CreateBlasHandle
void CreateBlasHandle()
Destory original blas handle and create a new one.
Definition: stream_gpu-inl.h:129

mshadow::Stream< gpu >::stream_
cudaStream_t stream_
cudaStream
Definition: stream_gpu-inl.h:44

mshadow::Stream< gpu >::blas_handle_
cublasHandle_t blas_handle_
cublas handle
Definition: stream_gpu-inl.h:46

mshadow::Stream< gpu >::DestroyDnnHandle
void DestroyDnnHandle()
Definition: stream_gpu-inl.h:176

mshadow::Stream< gpu >::Wait
void Wait(void)
wait for all the computation associated with this stream to complete
Definition: stream_gpu-inl.h:79

mshadow::Stream< gpu >::GetBlasHandle
static cublasHandle_t GetBlasHandle(Stream< gpu > *stream)
return actual cublasHandle
Definition: stream_gpu-inl.h:111

mshadow
overloaded + operator between half_t and bf16_t
Definition: base.h:334

mshadow::Stream< gpu >::CreateDnnHandle
void CreateDnnHandle()
Definition: stream_gpu-inl.h:186

mshadow::Stream< gpu >::DestroyBlasHandle
void DestroyBlasHandle()
Destory cublas handle if own it.
Definition: stream_gpu-inl.h:121

base.h

mshadow::Stream< gpu >::solver_handle_
cusolverDnHandle_t solver_handle_
cusolver handle
Definition: stream_gpu-inl.h:49

MSHADOW_USE_CUDNN
#define MSHADOW_USE_CUDNN
use CUDNN support, must ensure that the cudnn include path is correct
Definition: base.h:125

mshadow::Stream< gpu >::CreateSolverHandle
void CreateSolverHandle()
Definition: stream_gpu-inl.h:155

mshadow::DeleteStream< gpu >
void DeleteStream< gpu >(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:200

mshadow::Stream
computaion stream structure, used for asynchronous computations
Definition: tensor.h:383