[Mlir-commits] [mlir] 4ebd836 - [mlir][sparse][gpu] fix F32 bug for SpMV and SpMM
Aart Bik
llvmlistbot at llvm.org
Tue May 23 17:36:12 PDT 2023
Author: Aart Bik
Date: 2023-05-23T17:36:03-07:00
New Revision: 4ebd836d9e7b342ece764d81bed13d3163c8af62
URL: https://github.com/llvm/llvm-project/commit/4ebd836d9e7b342ece764d81bed13d3163c8af62
DIFF: https://github.com/llvm/llvm-project/commit/4ebd836d9e7b342ece764d81bed13d3163c8af62.diff
LOG: [mlir][sparse][gpu] fix F32 bug for SpMV and SpMM
The alpha/beta variables, residing on the host, should have the
32-bit or 64-bit width of the result type. It was formerly always
passed as double.
Reviewed By: Peiming
Differential Revision: https://reviews.llvm.org/D151255
Added:
Modified:
mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
Removed:
################################################################################
diff --git a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
index d5eb9ca1731b9..cbfae90e63f11 100644
--- a/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
+++ b/mlir/lib/ExecutionEngine/CudaRuntimeWrappers.cpp
@@ -246,6 +246,19 @@ static inline cusparseIndexType_t idxTp(int32_t width) {
}
}
+// Some macro magic to get float/double alpha and beta on host.
+#define ALPHABETA(w, alpha, beta) \
+ float(alpha##f) = 1.0, (beta##f) = 1.0; \
+ double(alpha##d) = 1.0, (beta##d) = 1.0; \
+ void *(alpha##p), *(beta##p); \
+ if ((w) == 32) { \
+ (alpha##p) = reinterpret_cast<void *>(&(alpha##f)); \
+ (beta##p) = reinterpret_cast<void *>(&(beta##f)); \
+ } else { \
+ (alpha##p) = reinterpret_cast<void *>(&(alpha##d)); \
+ (beta##p) = reinterpret_cast<void *>(&(beta##d)); \
+ }
+
extern "C" MLIR_CUDA_WRAPPERS_EXPORT void *
mgpuCreateSparseEnv(CUstream /*stream*/) {
cusparseHandle_t handle = nullptr;
@@ -329,11 +342,10 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMVBufferSize(
cusparseDnVecDescr_t vecX = reinterpret_cast<cusparseDnVecDescr_t>(x);
cusparseDnVecDescr_t vecY = reinterpret_cast<cusparseDnVecDescr_t>(y);
cudaDataType_t dtp = dataTp(dw);
- double alpha = 1.0;
- double beta = 1.0;
+ ALPHABETA(dw, alpha, beta)
size_t bufferSize = 0;
CUSPARSE_REPORT_IF_ERROR(cusparseSpMV_bufferSize(
- handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX, &beta, vecY,
+ handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alphap, matA, vecX, betap, vecY,
dtp, CUSPARSE_SPMV_ALG_DEFAULT, &bufferSize))
return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
}
@@ -347,11 +359,10 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSpMV(void *h, void *a, void *x,
cusparseDnVecDescr_t vecX = reinterpret_cast<cusparseDnVecDescr_t>(x);
cusparseDnVecDescr_t vecY = reinterpret_cast<cusparseDnVecDescr_t>(y);
cudaDataType_t dtp = dataTp(dw);
- double alpha = 1.0;
- double beta = 1.0;
+ ALPHABETA(dw, alpha, beta)
CUSPARSE_REPORT_IF_ERROR(
- cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, vecX,
- &beta, vecY, dtp, CUSPARSE_SPMV_ALG_DEFAULT, buf))
+ cusparseSpMV(handle, CUSPARSE_OPERATION_NON_TRANSPOSE, alphap, matA, vecX,
+ betap, vecY, dtp, CUSPARSE_SPMV_ALG_DEFAULT, buf))
}
extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMMBufferSize(
@@ -361,12 +372,11 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT intptr_t mgpuSpMMBufferSize(
cusparseDnMatDescr_t matB = reinterpret_cast<cusparseDnMatDescr_t>(b);
cusparseDnMatDescr_t matC = reinterpret_cast<cusparseDnMatDescr_t>(c);
cudaDataType_t dtp = dataTp(dw);
- double alpha = 1.0;
- double beta = 1.0;
+ ALPHABETA(dw, alpha, beta)
size_t bufferSize = 0;
CUSPARSE_REPORT_IF_ERROR(cusparseSpMM_bufferSize(
handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
- CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, matB, &beta, matC, dtp,
+ CUSPARSE_OPERATION_NON_TRANSPOSE, alphap, matA, matB, betap, matC, dtp,
CUSPARSE_SPMM_ALG_DEFAULT, &bufferSize))
return bufferSize == 0 ? 1 : bufferSize; // avoid zero-alloc
}
@@ -380,10 +390,9 @@ extern "C" MLIR_CUDA_WRAPPERS_EXPORT void mgpuSpMM(void *h, void *a, void *b,
cusparseDnMatDescr_t matB = reinterpret_cast<cusparseDnMatDescr_t>(b);
cusparseDnMatDescr_t matC = reinterpret_cast<cusparseDnMatDescr_t>(c);
cudaDataType_t dtp = dataTp(dw);
- double alpha = 1.0;
- double beta = 1.0;
+ ALPHABETA(dw, alpha, beta)
CUSPARSE_REPORT_IF_ERROR(
cusparseSpMM(handle, CUSPARSE_OPERATION_NON_TRANSPOSE,
- CUSPARSE_OPERATION_NON_TRANSPOSE, &alpha, matA, matB, &beta,
+ CUSPARSE_OPERATION_NON_TRANSPOSE, alphap, matA, matB, betap,
matC, dtp, CUSPARSE_SPMM_ALG_DEFAULT, buf))
}
More information about the Mlir-commits
mailing list