[flang-commits] [flang] 24dfcc0 - [flang][cuda] Use the nvvm.vote.sync op for all and any (#134433)
via flang-commits
flang-commits at lists.llvm.org
Fri Apr 4 13:45:06 PDT 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-04-04T13:45:03-07:00
New Revision: 24dfcc0c024f9ab8ba61c0994513f57e882961fc
URL: https://github.com/llvm/llvm-project/commit/24dfcc0c024f9ab8ba61c0994513f57e882961fc
DIFF: https://github.com/llvm/llvm-project/commit/24dfcc0c024f9ab8ba61c0994513f57e882961fc.diff
LOG: [flang][cuda] Use the nvvm.vote.sync op for all and any (#134433)
NVVM operations are now available for all and any as well. Use the op
and clean up the generation function to handle all the 3 vote sync
kinds.
Added:
Modified:
flang/include/flang/Optimizer/Builder/IntrinsicCall.h
flang/lib/Optimizer/Builder/IntrinsicCall.cpp
flang/test/Lower/CUDA/cuda-device-proc.cuf
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 68617d6e37d7c..17052113859e1 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -19,6 +19,7 @@
#include "flang/Runtime/iostat-consts.h"
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include <optional>
@@ -450,9 +451,8 @@ struct IntrinsicLibrary {
llvm::ArrayRef<fir::ExtendedValue> args);
fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
fir::ExtendedValue genVerify(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
- mlir::Value genVoteAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
- mlir::Value genVoteAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
- mlir::Value genVoteBallotSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ template <mlir::NVVM::VoteSyncKind kind>
+ mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
/// Implement all conversion functions like DBLE, the first argument is
/// the value to convert. There may be an additional KIND arguments that
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 93c00b6b28140..0248586344ad9 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -48,7 +48,6 @@
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "llvm/Support/CommandLine.h"
@@ -262,7 +261,7 @@ static constexpr IntrinsicHandler handlers[]{
{{{"mask", asAddr}, {"dim", asValue}}},
/*isElemental=*/false},
{"all_sync",
- &I::genVoteAllSync,
+ &I::genVoteSync<mlir::NVVM::VoteSyncKind::all>,
{{{"mask", asValue}, {"pred", asValue}}},
/*isElemental=*/false},
{"allocated",
@@ -275,7 +274,7 @@ static constexpr IntrinsicHandler handlers[]{
{{{"mask", asAddr}, {"dim", asValue}}},
/*isElemental=*/false},
{"any_sync",
- &I::genVoteAnySync,
+ &I::genVoteSync<mlir::NVVM::VoteSyncKind::any>,
{{{"mask", asValue}, {"pred", asValue}}},
/*isElemental=*/false},
{"asind", &I::genAsind},
@@ -341,7 +340,7 @@ static constexpr IntrinsicHandler handlers[]{
{"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicxori", &I::genAtomicXor, {{{"a", asAddr}, {"v", asValue}}}, false},
{"ballot_sync",
- &I::genVoteBallotSync,
+ &I::genVoteSync<mlir::NVVM::VoteSyncKind::ballot>,
{{{"mask", asValue}, {"pred", asValue}}},
/*isElemental=*/false},
{"bessel_jn",
@@ -6583,46 +6582,20 @@ IntrinsicLibrary::genMatchAllSync(mlir::Type resultType,
return value;
}
-static mlir::Value genVoteSync(fir::FirOpBuilder &builder, mlir::Location loc,
- llvm::StringRef funcName, mlir::Type resTy,
- llvm::ArrayRef<mlir::Value> args) {
- mlir::MLIRContext *context = builder.getContext();
- mlir::Type i32Ty = builder.getI32Type();
- mlir::Type i1Ty = builder.getI1Type();
- mlir::FunctionType ftype =
- mlir::FunctionType::get(context, {i32Ty, i1Ty}, {resTy});
- auto funcOp = builder.createFunction(loc, funcName, ftype);
- llvm::SmallVector<mlir::Value> filteredArgs;
- return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
-}
-
-// ALL_SYNC
-mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType,
- llvm::ArrayRef<mlir::Value> args) {
- assert(args.size() == 2);
- return genVoteSync(builder, loc, "llvm.nvvm.vote.all.sync",
- builder.getI1Type(), args);
-}
-
-// ANY_SYNC
-mlir::Value IntrinsicLibrary::genVoteAnySync(mlir::Type resultType,
- llvm::ArrayRef<mlir::Value> args) {
- assert(args.size() == 2);
- return genVoteSync(builder, loc, "llvm.nvvm.vote.any.sync",
- builder.getI1Type(), args);
-}
-
-// BALLOT_SYNC
-mlir::Value
-IntrinsicLibrary::genVoteBallotSync(mlir::Type resultType,
- llvm::ArrayRef<mlir::Value> args) {
+// ALL_SYNC, ANY_SYNC, BALLOT_SYNC
+template <mlir::NVVM::VoteSyncKind kind>
+mlir::Value IntrinsicLibrary::genVoteSync(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);
mlir::Value arg1 =
builder.create<fir::ConvertOp>(loc, builder.getI1Type(), args[1]);
- return builder
- .create<mlir::NVVM::VoteSyncOp>(loc, resultType, args[0], arg1,
- mlir::NVVM::VoteSyncKind::ballot)
- .getResult();
+ mlir::Type resTy = kind == mlir::NVVM::VoteSyncKind::ballot
+ ? builder.getI32Type()
+ : builder.getI1Type();
+ auto voteRes =
+ builder.create<mlir::NVVM::VoteSyncOp>(loc, resTy, args[0], arg1, kind)
+ .getResult();
+ return builder.create<fir::ConvertOp>(loc, resultType, voteRes);
}
// MATCH_ANY_SYNC
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 7d6d920dfb2e8..8f5e6dd36da4e 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -301,8 +301,8 @@ attributes(device) subroutine testVote()
end subroutine
! CHECK-LABEL: func.func @_QPtestvote()
-! CHECK: fir.call @llvm.nvvm.vote.all.sync
-! CHECK: fir.call @llvm.nvvm.vote.any.sync
+! CHECK: %{{.*}} = nvvm.vote.sync all %{{.*}}, %{{.*}} -> i1
+! CHECK: %{{.*}} = nvvm.vote.sync any %{{.*}}, %{{.*}} -> i1
! CHECK: %{{.*}} = nvvm.vote.sync ballot %{{.*}}, %{{.*}} -> i32
! CHECK-DAG: func.func private @__ldca_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>)
More information about the flang-commits
mailing list