[Mlir-commits] [mlir] 4ebd836 - [mlir][sparse][gpu] fix F32 bug for SpMV and SpMM

Aart Bik llvmlistbot at llvm.org
Tue May 23 17:36:12 PDT 2023


Author: Aart Bik
Date: 2023-05-23T17:36:03-07:00
New Revision: 4ebd836d9e7b342ece764d81bed13d3163c8af62

URL: https://github.com/llvm/llvm-project/commit/4ebd836d9e7b342ece764d81bed13d3163c8af62
DIFF: https://github.com/llvm/llvm-project/commit/4ebd836d9e7b342ece764d81bed13d3163c8af62.diff

LOG: [mlir][sparse][gpu] fix F32 bug for SpMV and SpMM

The alpha/beta variables, residing on the host, should have the
32-bit or 64-bit width of the result type. It was formerly always
passed as double.

Reviewed By: Peiming

Differential Revision: https://reviews.llvm.org/D151255

Added: 
    

Modified: 
    mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp

Removed: 
    


################################################################################
diff  --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
index d5eb9ca1731b9..cbfae90e63f11 100644
--- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
@@ -246,6 +246,19 @@ static inline cusparseIndexType_t idxTp(int32_t width) {
   }
 }
 
+// Some macro magic to get float/double alpha and beta on host.
+#define ALPHABETA(w, alpha, beta)                                              \
+  float(alpha##f) = 1.0, (beta##f) = 1.0;                                      \
+  double(alpha##d) = 1.0, (beta##d) = 1.0;                                     \
+  void *(alpha##p), *(beta##p);                                                \
+  if ((w) == 32) {                                                             \
+    (alpha##p) = reinterpret_cast<void *>(&(alpha##f));                        \
+    (beta##p) = reinterpret_cast<void *>(&(beta##f));                          \
+  } else {                                                                     \
+    (alpha##p) = reinterpret_cast<void *>(&(alpha##d));                        \
+    (beta##p) = reinterpret_cast<void *>(&(beta##d));                          \
+  }
+
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT void *
 mgpuCreateSparseEnv(CUstream /*stream*/) {
   cusparseHandle_t handle = nullptr;
@@ -329,11 +342,10 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMVBufferSize(
   cusparseDnVecDescr_t vecX = reinterpret_cast<cusparseDnVecDescr_t>(x);
   cusparseDnVecDescr_t vecY = reinterpret_cast<cusparseDnVecDescr_t>(y);
   cudaDataType_t dtp = dataTp(dw);
-  double alpha = 1.0;
-  double beta = 1.0;
+  ALPHABETA(dw, alpha, beta)
   size_t bufferSize = 0;
   CUSPARSE_REPORT_IF_ERROR(cusparseSpMV_bufferSize(
-      handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY,
+      handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alphap, matA, vecX, betap, vecY,
       dtp, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize))
   return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
 }
@@ -347,11 +359,10 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSpMV(void *h, void *a, void *x,
   cusparseDnVecDescr_t vecX = reinterpret_cast<cusparseDnVecDescr_t>(x);
   cusparseDnVecDescr_t vecY = reinterpret_cast<cusparseDnVecDescr_t>(y);
   cudaDataType_t dtp = dataTp(dw);
-  double alpha = 1.0;
-  double beta = 1.0;
+  ALPHABETA(dw, alpha, beta)
   CUSPARSE_REPORT_IF_ERROR(
-      cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX,
-                   &beta, vecY, dtp, CUSPARSE_SPMV_ALG_DEFAULT, buf))
+      cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alphap, matA, vecX,
+                   betap, vecY, dtp, CUSPARSE_SPMV_ALG_DEFAULT, buf))
 }
 
 extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMMBufferSize(
@@ -361,12 +372,11 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMMBufferSize(
   cusparseDnMatDescr_t matB = reinterpret_cast<cusparseDnMatDescr_t>(b);
   cusparseDnMatDescr_t matC = reinterpret_cast<cusparseDnMatDescr_t>(c);
   cudaDataType_t dtp = dataTp(dw);
-  double alpha = 1.0;
-  double beta = 1.0;
+  ALPHABETA(dw, alpha, beta)
   size_t bufferSize = 0;
   CUSPARSE_REPORT_IF_ERROR(cusparseSpMM_bufferSize(
       handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
-      CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, matB, &beta, matC, dtp,
+      CUSPARSE_OPERATION_NON_TRANSPOSE, alphap, matA, matB, betap, matC, dtp,
       CUSPARSE_SPMM_ALG_DEFAULT, &bufferSize))
   return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
 }
@@ -380,10 +390,9 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSpMM(void *h, void *a, void *b,
   cusparseDnMatDescr_t matB = reinterpret_cast<cusparseDnMatDescr_t>(b);
   cusparseDnMatDescr_t matC = reinterpret_cast<cusparseDnMatDescr_t>(c);
   cudaDataType_t dtp = dataTp(dw);
-  double alpha = 1.0;
-  double beta = 1.0;
+  ALPHABETA(dw, alpha, beta)
   CUSPARSE_REPORT_IF_ERROR(
       cusparseSpMM(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
-                   CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, matB, &beta,
+                   CUSPARSE_OPERATION_NON_TRANSPOSE, alphap, matA, matB, betap,
                    matC, dtp, CUSPARSE_SPMM_ALG_DEFAULT, buf))
 }


        


More information about the Mlir-commits mailing list