fmm_pts_gpu.hpp 2.1 KB

123456789101112131415161718192021222324252627282930313233343536
  1. #ifndef _CUDA_FUNC_HPP_
  2. #define _CUDA_FUNC_HPP_
  3. #ifdef __cplusplus
  4. extern "C" {
  5. #endif
  6. void in_perm_gpu_f(char* precomp_data, float * input_data, char* buff_in , size_t* input_perm, size_t vec_cnt, size_t M_dim0, cudaStream_t* stream);
  7. void in_perm_gpu_d(char* precomp_data, double* input_data, char* buff_in , size_t* input_perm, size_t vec_cnt, size_t M_dim0, cudaStream_t* stream);
  8. void out_perm_gpu_f(char* precomp_data, float * output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream);
  9. void out_perm_gpu_d(char* precomp_data, double* output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream);
  10. #ifdef __cplusplus
  11. }
  12. #endif
  13. template <class Real_t>
  14. void in_perm_gpu(char* precomp_data, Real_t* input_data, char* buff_in , size_t* input_perm, size_t vec_cnt, size_t M_dim0, cudaStream_t* stream);
  15. template <class Real_t>
  16. void out_perm_gpu(char* precomp_data, Real_t* output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream);
  17. template<> inline void in_perm_gpu<float >(char* precomp_data, float * input_data, char* buff_in , size_t* input_perm, size_t vec_cnt, size_t M_dim0, cudaStream_t* stream){
  18. in_perm_gpu_f (precomp_data, input_data, buff_in , input_perm, vec_cnt, M_dim0, stream);
  19. }
  20. template<> inline void in_perm_gpu<double>(char* precomp_data, double* input_data, char* buff_in , size_t* input_perm, size_t vec_cnt, size_t M_dim0, cudaStream_t* stream){
  21. in_perm_gpu_d (precomp_data, input_data, buff_in , input_perm, vec_cnt, M_dim0, stream);
  22. }
  23. template<> inline void out_perm_gpu<float >(char* precomp_data, float * output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream){
  24. out_perm_gpu_f(precomp_data, output_data, buff_out, output_perm, vec_cnt, M_dim1, stream);
  25. }
  26. template<> inline void out_perm_gpu<double>(char* precomp_data, double* output_data, char* buff_out, size_t* output_perm, size_t vec_cnt, size_t M_dim1, cudaStream_t* stream){
  27. out_perm_gpu_d(precomp_data, output_data, buff_out, output_perm, vec_cnt, M_dim1, stream);
  28. }
  29. #endif //_CUDA_FUNC_HPP_