[flang-commits] [flang] 24dfcc0 - [flang][cuda] Use the nvvm.vote.sync op for all and any (#134433)

via flang-commits flang-commits at lists.llvm.org
Fri Apr 4 13:45:06 PDT 2025


Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-04-04T13:45:03-07:00
New Revision: 24dfcc0c024f9ab8ba61c0994513f57e882961fc

URL: https://github.com/llvm/llvm-project/commit/24dfcc0c024f9ab8ba61c0994513f57e882961fc
DIFF: https://github.com/llvm/llvm-project/commit/24dfcc0c024f9ab8ba61c0994513f57e882961fc.diff

LOG: [flang][cuda] Use the nvvm.vote.sync op for all and any (#134433)

NVVM operations are now available for all and any as well. Use the op
and clean up the generation function to handle all the 3 vote sync
kinds.

Added: 
    

Modified: 
    flang/include/flang/Optimizer/Builder/IntrinsicCall.h
    flang/lib/Optimizer/Builder/IntrinsicCall.cpp
    flang/test/Lower/CUDA/cuda-device-proc.cuf

Removed: 
    


################################################################################
diff  --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 68617d6e37d7c..17052113859e1 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -19,6 +19,7 @@
 #include "flang/Runtime/iostat-consts.h"
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include <optional>
 
@@ -450,9 +451,8 @@ struct IntrinsicLibrary {
                                llvm::ArrayRef<fir::ExtendedValue> args);
   fir::ExtendedValue genUnpack(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
   fir::ExtendedValue genVerify(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
-  mlir::Value genVoteAllSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
-  mlir::Value genVoteAnySync(mlir::Type, llvm::ArrayRef<mlir::Value>);
-  mlir::Value genVoteBallotSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
+  template <mlir::NVVM::VoteSyncKind kind>
+  mlir::Value genVoteSync(mlir::Type, llvm::ArrayRef<mlir::Value>);
 
   /// Implement all conversion functions like DBLE, the first argument is
   /// the value to convert. There may be an additional KIND arguments that

diff  --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 93c00b6b28140..0248586344ad9 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -48,7 +48,6 @@
 #include "mlir/Dialect/Complex/IR/Complex.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMTypes.h"
-#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
 #include "mlir/Dialect/Math/IR/Math.h"
 #include "mlir/Dialect/Vector/IR/VectorOps.h"
 #include "llvm/Support/CommandLine.h"
@@ -262,7 +261,7 @@ static constexpr IntrinsicHandler handlers[]{
      {{{"mask", asAddr}, {"dim", asValue}}},
      /*isElemental=*/false},
     {"all_sync",
-     &I::genVoteAllSync,
+     &I::genVoteSync<mlir::NVVM::VoteSyncKind::all>,
      {{{"mask", asValue}, {"pred", asValue}}},
      /*isElemental=*/false},
     {"allocated",
@@ -275,7 +274,7 @@ static constexpr IntrinsicHandler handlers[]{
      {{{"mask", asAddr}, {"dim", asValue}}},
      /*isElemental=*/false},
     {"any_sync",
-     &I::genVoteAnySync,
+     &I::genVoteSync<mlir::NVVM::VoteSyncKind::any>,
      {{{"mask", asValue}, {"pred", asValue}}},
      /*isElemental=*/false},
     {"asind", &I::genAsind},
@@ -341,7 +340,7 @@ static constexpr IntrinsicHandler handlers[]{
     {"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
     {"atomicxori", &I::genAtomicXor, {{{"a", asAddr}, {"v", asValue}}}, false},
     {"ballot_sync",
-     &I::genVoteBallotSync,
+     &I::genVoteSync<mlir::NVVM::VoteSyncKind::ballot>,
      {{{"mask", asValue}, {"pred", asValue}}},
      /*isElemental=*/false},
     {"bessel_jn",
@@ -6583,46 +6582,20 @@ IntrinsicLibrary::genMatchAllSync(mlir::Type resultType,
   return value;
 }
 
-static mlir::Value genVoteSync(fir::FirOpBuilder &builder, mlir::Location loc,
-                               llvm::StringRef funcName, mlir::Type resTy,
-                               llvm::ArrayRef<mlir::Value> args) {
-  mlir::MLIRContext *context = builder.getContext();
-  mlir::Type i32Ty = builder.getI32Type();
-  mlir::Type i1Ty = builder.getI1Type();
-  mlir::FunctionType ftype =
-      mlir::FunctionType::get(context, {i32Ty, i1Ty}, {resTy});
-  auto funcOp = builder.createFunction(loc, funcName, ftype);
-  llvm::SmallVector<mlir::Value> filteredArgs;
-  return builder.create<fir::CallOp>(loc, funcOp, args).getResult(0);
-}
-
-// ALL_SYNC
-mlir::Value IntrinsicLibrary::genVoteAllSync(mlir::Type resultType,
-                                             llvm::ArrayRef<mlir::Value> args) {
-  assert(args.size() == 2);
-  return genVoteSync(builder, loc, "llvm.nvvm.vote.all.sync",
-                     builder.getI1Type(), args);
-}
-
-// ANY_SYNC
-mlir::Value IntrinsicLibrary::genVoteAnySync(mlir::Type resultType,
-                                             llvm::ArrayRef<mlir::Value> args) {
-  assert(args.size() == 2);
-  return genVoteSync(builder, loc, "llvm.nvvm.vote.any.sync",
-                     builder.getI1Type(), args);
-}
-
-// BALLOT_SYNC
-mlir::Value
-IntrinsicLibrary::genVoteBallotSync(mlir::Type resultType,
-                                    llvm::ArrayRef<mlir::Value> args) {
+// ALL_SYNC, ANY_SYNC, BALLOT_SYNC
+template <mlir::NVVM::VoteSyncKind kind>
+mlir::Value IntrinsicLibrary::genVoteSync(mlir::Type resultType,
+                                          llvm::ArrayRef<mlir::Value> args) {
   assert(args.size() == 2);
   mlir::Value arg1 =
       builder.create<fir::ConvertOp>(loc, builder.getI1Type(), args[1]);
-  return builder
-      .create<mlir::NVVM::VoteSyncOp>(loc, resultType, args[0], arg1,
-                                      mlir::NVVM::VoteSyncKind::ballot)
-      .getResult();
+  mlir::Type resTy = kind == mlir::NVVM::VoteSyncKind::ballot
+                         ? builder.getI32Type()
+                         : builder.getI1Type();
+  auto voteRes =
+      builder.create<mlir::NVVM::VoteSyncOp>(loc, resTy, args[0], arg1, kind)
+          .getResult();
+  return builder.create<fir::ConvertOp>(loc, resultType, voteRes);
 }
 
 // MATCH_ANY_SYNC

diff  --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 7d6d920dfb2e8..8f5e6dd36da4e 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -301,8 +301,8 @@ attributes(device) subroutine testVote()
 end subroutine
 
 ! CHECK-LABEL: func.func @_QPtestvote()
-! CHECK: fir.call @llvm.nvvm.vote.all.sync
-! CHECK: fir.call @llvm.nvvm.vote.any.sync
+! CHECK: %{{.*}} = nvvm.vote.sync all %{{.*}}, %{{.*}} -> i1
+! CHECK: %{{.*}} = nvvm.vote.sync any %{{.*}}, %{{.*}} -> i1
 ! CHECK: %{{.*}} = nvvm.vote.sync ballot %{{.*}}, %{{.*}} -> i32
 
 ! CHECK-DAG: func.func private @__ldca_i4x4_(!fir.ref<!fir.array<4xi32>>, !fir.ref<!fir.array<4xi32>>)


        


More information about the flang-commits mailing list