mxnet
stream_gpu-inl.h
Go to the documentation of this file.
1 /*
2  * Licensed to the Apache Software Foundation (ASF) under one
3  * or more contributor license agreements. See the NOTICE file
4  * distributed with this work for additional information
5  * regarding copyright ownership. The ASF licenses this file
6  * to you under the Apache License, Version 2.0 (the
7  * "License"); you may not use this file except in compliance
8  * with the License. You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing,
13  * software distributed under the License is distributed on an
14  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15  * KIND, either express or implied. See the License for the
16  * specific language governing permissions and limitations
17  * under the License.
18  */
19 
25 #ifndef MSHADOW_STREAM_GPU_INL_H_
26 #define MSHADOW_STREAM_GPU_INL_H_
27 #include <memory>
28 #include "./base.h"
29 #include "./tensor.h"
30 #include "./logging.h"
31 
32 namespace mshadow {
33 #if MSHADOW_USE_CUDA == 1
34 // Stream alocation
35 // actual implementation of GPU stream in CUDA
36 template<>
37 struct Stream<gpu> {
39  enum HandleState {
40  NoHandle = 0,
41  OwnHandle = 1,
42  };
44  cudaStream_t stream_;
46  cublasHandle_t blas_handle_;
48  #if MSHADOW_USE_CUSOLVER == 1
49  cusolverDnHandle_t solver_handle_;
50  #endif
51 
52  #if MSHADOW_USE_CUDNN == 1
53  cudnnHandle_t dnn_handle_;
54  #endif
55 
62  cudaDeviceProp prop;
64  int dev_id;
65 
66  Stream(void)
67  : stream_(0)
68  , blas_handle_(0)
69 #if MSHADOW_USE_CUDNN == 1
70  , dnn_handle_(0)
71 #endif
72  , blas_handle_ownership_(NoHandle)
73  , solver_handle_ownership_(NoHandle)
74  , dnn_handle_ownership_(NoHandle) {}
79  inline void Wait(void) {
80  MSHADOW_CUDA_CALL(cudaStreamSynchronize(stream_));
81  }
86  inline bool CheckIdle(void) {
87  cudaError_t err = cudaStreamQuery(stream_);
88  if (err == cudaSuccess) return true;
89  if (err == cudaErrorNotReady) return false;
90  LOG(FATAL) << cudaGetErrorString(err);
91  return false;
92  }
97  inline static cudaStream_t GetStream(Stream<gpu> *stream) {
98  if (stream == NULL) {
99 #if MSHADOW_FORCE_STREAM
100  LOG(FATAL) << "Default GPU stream was used when MSHADOW_FORCE_STREAM was on";
101 #endif
102  return 0;
103  } else {
104  return stream->stream_;
105  }
106  }
111  inline static cublasHandle_t GetBlasHandle(Stream<gpu> *stream) {
112  if (stream == NULL) {
113  return 0;
114  } else {
115  CHECK_NE(stream->blas_handle_ownership_, NoHandle)
116  << "No handle exist in source stream";
117  return stream->blas_handle_;
118  }
119  }
121  inline void DestroyBlasHandle() {
122  if (blas_handle_ownership_ == OwnHandle) {
123  cublasStatus_t err = cublasDestroy(blas_handle_);
124  blas_handle_ownership_ = NoHandle;
125  CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Destory cublas handle failed";
126  }
127  }
129  inline void CreateBlasHandle() {
130  this->DestroyBlasHandle();
131  cublasStatus_t err = cublasCreate(&blas_handle_);
132  blas_handle_ownership_ = OwnHandle;
133  CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Create cublas handle failed";
134  err = cublasSetStream(blas_handle_, stream_);
135  CHECK_EQ(err, CUBLAS_STATUS_SUCCESS) << "Setting cublas stream failed";
136  }
137 #if MSHADOW_USE_CUSOLVER == 1
138  inline static cusolverDnHandle_t GetSolverHandle(Stream<gpu> *stream) {
139  if (stream == NULL) {
140  return 0;
141  } else {
142  CHECK_NE(stream->solver_handle_ownership_, NoHandle) << "No handle exist in source stream";
143  return stream->solver_handle_;
144  }
145  }
146 #endif
147  inline void DestroySolverHandle() {
148 #if MSHADOW_USE_CUSOLVER == 1
149  if (solver_handle_ownership_ == OwnHandle) {
150  cusolverStatus_t err = cusolverDnDestroy(solver_handle_);
151  CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) << "Destory cusolver handle failed";
152  }
153 #endif
154  }
155  inline void CreateSolverHandle() {
156 #if MSHADOW_USE_CUSOLVER == 1
157  this->DestroySolverHandle();
158  cusolverStatus_t err = cusolverDnCreate(&solver_handle_);
159  CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) << "Create cusolver handle failed";
160  err = cusolverDnSetStream(solver_handle_, stream_);
161  CHECK_EQ(err, CUSOLVER_STATUS_SUCCESS) << "Setting cusolver stream failed";
162  this->solver_handle_ownership_ = OwnHandle;
163 #endif
164  }
165 // #if MSHADOW_USE_CUDNN && defined(__CUDACC__)
166 #if MSHADOW_USE_CUDNN == 1
167  inline static cudnnHandle_t GetDnnHandle(Stream<gpu> *stream) {
168  if (stream == NULL) {
169  return 0;
170  } else {
171  CHECK_NE(stream->dnn_handle_ownership_, NoHandle) << "No handle exist in source stream";
172  return stream->dnn_handle_;
173  }
174  }
175 #endif
176  inline void DestroyDnnHandle() {
177 // #if MSHADOW_USE_CUDNN && defined(__CUDACC__)
178 #if MSHADOW_USE_CUDNN == 1
179  if (dnn_handle_ownership_ == OwnHandle) {
180  cudnnStatus_t err = cudnnDestroy(dnn_handle_);
181  this->dnn_handle_ownership_ = NoHandle;
182  CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
183  }
184 #endif
185  }
186  inline void CreateDnnHandle() {
187 // #if MSHADOW_USE_CUDNN == 1 && defined(__CUDACC__)
188 #if MSHADOW_USE_CUDNN == 1
189  this->DestroyDnnHandle();
190  cudnnStatus_t err = cudnnCreate(&dnn_handle_);
191  CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
192  // At this point, we have the resource which may need to be freed
193  this->dnn_handle_ownership_ = OwnHandle;
194  err = cudnnSetStream(dnn_handle_, stream_);
195  CHECK_EQ(err, CUDNN_STATUS_SUCCESS) << cudnnGetErrorString(err);
196 #endif
197  }
198 };
199 template<>
200 inline void DeleteStream<gpu>(Stream<gpu> *stream) {
201  if (stream) {
202  MSHADOW_CUDA_CALL(cudaStreamDestroy(stream->stream_));
203  stream->DestroyBlasHandle();
204  stream->DestroySolverHandle();
205  stream->DestroyDnnHandle();
206  delete stream;
207  }
208 }
209 template<>
210 inline Stream<gpu> *NewStream<gpu>(bool create_blas_handle,
211  bool create_dnn_handle,
212  int dev_id) {
213  // RAII on Cuda exception
214  struct StreamDeleter { void operator()(Stream<gpu> *ptr) const { DeleteStream<gpu>(ptr); } };
215  std::unique_ptr<Stream<gpu>, StreamDeleter> st(new Stream<gpu>());
216  MSHADOW_CUDA_CALL(cudaStreamCreate(&st->stream_));
217  if (create_blas_handle) {
218  st->CreateBlasHandle();
219  st->CreateSolverHandle();
220  }
221  if (create_dnn_handle) {
222  st->CreateDnnHandle();
223  }
224  st->dev_id = dev_id;
225  if (dev_id != -1) {
226  MSHADOW_CUDA_CALL(cudaGetDeviceProperties(&st->prop, dev_id));
227  }
228  return st.release();
229 }
230 #endif
231 } // namespace mshadow
232 #endif // MSHADOW_STREAM_GPU_INL_H_
static cudaStream_t GetStream(Stream< gpu > *stream)
returns actual cudaStream_t given an input GPU stream pointer
Definition: stream_gpu-inl.h:97
HandleState dnn_handle_ownership_
cudnn handle ownership
Definition: stream_gpu-inl.h:60
Definition: stream_gpu-inl.h:37
bool CheckIdle(void)
query whether the the stream is idle
Definition: stream_gpu-inl.h:86
static cusolverDnHandle_t GetSolverHandle(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:138
HandleState
handle state
Definition: stream_gpu-inl.h:39
Stream(void)
Definition: stream_gpu-inl.h:66
Stream< gpu > * NewStream< gpu >(bool create_blas_handle, bool create_dnn_handle, int dev_id)
Definition: stream_gpu-inl.h:210
void DestroySolverHandle()
Definition: stream_gpu-inl.h:147
#define MSHADOW_CUDA_CALL(func)
Protected cuda call in mshadow.
Definition: base.h:278
cudaDeviceProp prop
cudaDeviceProp
Definition: stream_gpu-inl.h:62
header file of tensor data structure and functions This lib requires explicit memory allocation and d...
device name GPU
Definition: tensor.h:46
HandleState blas_handle_ownership_
cudnn handle
Definition: stream_gpu-inl.h:56
int dev_id
dev id
Definition: stream_gpu-inl.h:64
HandleState solver_handle_ownership_
cusolver handle ownership
Definition: stream_gpu-inl.h:58
void CreateBlasHandle()
Destory original blas handle and create a new one.
Definition: stream_gpu-inl.h:129
cudaStream_t stream_
cudaStream
Definition: stream_gpu-inl.h:44
cublasHandle_t blas_handle_
cublas handle
Definition: stream_gpu-inl.h:46
void DestroyDnnHandle()
Definition: stream_gpu-inl.h:176
void Wait(void)
wait for all the computation associated with this stream to complete
Definition: stream_gpu-inl.h:79
static cublasHandle_t GetBlasHandle(Stream< gpu > *stream)
return actual cublasHandle
Definition: stream_gpu-inl.h:111
overloaded + operator between half_t and bf16_t
Definition: base.h:334
void CreateDnnHandle()
Definition: stream_gpu-inl.h:186
void DestroyBlasHandle()
Destory cublas handle if own it.
Definition: stream_gpu-inl.h:121
cusolverDnHandle_t solver_handle_
cusolver handle
Definition: stream_gpu-inl.h:49
#define MSHADOW_USE_CUDNN
use CUDNN support, must ensure that the cudnn include path is correct
Definition: base.h:125
void CreateSolverHandle()
Definition: stream_gpu-inl.h:155
void DeleteStream< gpu >(Stream< gpu > *stream)
Definition: stream_gpu-inl.h:200
computaion stream structure, used for asynchronous computations
Definition: tensor.h:383