[flang-commits] [flang] 7288f1b - [flang][cuda] Use nvvm operation for match any (#134283)
via flang-commits
flang-commits at lists.llvm.org
Thu Apr 3 12:08:34 PDT 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-04-03T12:08:30-07:00
New Revision: 7288f1bc32c1964c4de50aa305b696b32d0c0f1a
URL: https://github.com/llvm/llvm-project/commit/7288f1bc32c1964c4de50aa305b696b32d0c0f1a
DIFF: https://github.com/llvm/llvm-project/commit/7288f1bc32c1964c4de50aa305b696b32d0c0f1a.diff
LOG: [flang][cuda] Use nvvm operation for match any (#134283)
The string used for intrinsic was not the correct one
"llvm.nvvm.match.any.sync.i32p". There was an extra `p` at the end.
Use the NVVM operation instead so we don't duplicate it.
Added:
Modified:
flang/include/flang/Optimizer/Support/InitFIR.h
flang/lib/Optimizer/Builder/IntrinsicCall.cpp
flang/test/Lower/CUDA/cuda-device-proc.cuf
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Support/InitFIR.h b/flang/include/flang/Optimizer/Support/InitFIR.h
index e999796c23718..f509fdfcf4918 100644
--- a/flang/include/flang/Optimizer/Support/InitFIR.h
+++ b/flang/include/flang/Optimizer/Support/InitFIR.h
@@ -22,6 +22,7 @@
#include "mlir/Dialect/Affine/Passes.h"
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/Func/Extensions/InlinerExtension.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/OpenACC/Transforms/Passes.h"
#include "mlir/InitAllDialects.h"
#include "mlir/Pass/Pass.h"
@@ -37,7 +38,8 @@ namespace fir::support {
mlir::scf::SCFDialect, mlir::arith::ArithDialect, \
mlir::cf::ControlFlowDialect, mlir::func::FuncDialect, \
mlir::vector::VectorDialect, mlir::math::MathDialect, \
- mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect
+ mlir::complex::ComplexDialect, mlir::DLTIDialect, cuf::CUFDialect, \
+ mlir::NVVM::NVVMDialect
#define FLANG_CODEGEN_DIALECT_LIST FIRCodeGenDialect, mlir::LLVM::LLVMDialect
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 4988b6bfb3d3f..a562d9b7e461c 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -48,6 +48,7 @@
#include "mlir/Dialect/Complex/IR/Complex.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMTypes.h"
+#include "mlir/Dialect/LLVMIR/NVVMDialect.h"
#include "mlir/Dialect/Math/IR/Math.h"
#include "mlir/Dialect/Vector/IR/VectorOps.h"
#include "llvm/Support/CommandLine.h"
@@ -6552,23 +6553,15 @@ IntrinsicLibrary::genMatchAnySync(mlir::Type resultType,
assert(args.size() == 2);
bool is32 = args[1].getType().isInteger(32) || args[1].getType().isF32();
- llvm::StringRef funcName =
- is32 ? "llvm.nvvm.match.any.sync.i32p" : "llvm.nvvm.match.any.sync.i64p";
- mlir::MLIRContext *context = builder.getContext();
- mlir::Type i32Ty = builder.getI32Type();
- mlir::Type i64Ty = builder.getI64Type();
- mlir::Type valTy = is32 ? i32Ty : i64Ty;
+ mlir::Value arg1 = args[1];
+ if (arg1.getType().isF32() || arg1.getType().isF64())
+ arg1 = builder.create<fir::ConvertOp>(
+ loc, is32 ? builder.getI32Type() : builder.getI64Type(), arg1);
- mlir::FunctionType ftype =
- mlir::FunctionType::get(context, {i32Ty, valTy}, {i32Ty});
- auto funcOp = builder.createFunction(loc, funcName, ftype);
- llvm::SmallVector<mlir::Value> filteredArgs;
- filteredArgs.push_back(args[0]);
- if (args[1].getType().isF32() || args[1].getType().isF64())
- filteredArgs.push_back(builder.create<fir::ConvertOp>(loc, valTy, args[1]));
- else
- filteredArgs.push_back(args[1]);
- return builder.create<fir::CallOp>(loc, funcOp, filteredArgs).getResult(0);
+ return builder
+ .create<mlir::NVVM::MatchSyncOp>(loc, resultType, args[0], arg1,
+ mlir::NVVM::MatchSyncKind::any)
+ .getResult();
}
// MATMUL
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index a4a4750dd61e6..dbce4a5fa47dd 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -143,12 +143,10 @@ attributes(device) subroutine testMatchAny()
end subroutine
! CHECK-LABEL: func.func @_QPtestmatchany()
-! CHECK: fir.call @llvm.nvvm.match.any.sync.i32p
-! CHECK: fir.call @llvm.nvvm.match.any.sync.i64p
-! CHECK: fir.convert %{{.*}} : (f32) -> i32
-! CHECK: fir.call @llvm.nvvm.match.any.sync.i32p
-! CHECK: fir.convert %{{.*}} : (f64) -> i64
-! CHECK: fir.call @llvm.nvvm.match.any.sync.i64p
+! CHECK: %{{.*}} = nvvm.match.sync any %{{.*}}, %{{.*}} : i32 -> i32
+! CHECK: %{{.*}} = nvvm.match.sync any %{{.*}}, %{{.*}} : i64 -> i32
+! CHECK: %{{.*}} = nvvm.match.sync any %{{.*}}, %{{.*}} : i32 -> i32
+! CHECK: %{{.*}} = nvvm.match.sync any %{{.*}}, %{{.*}} : i64 -> i32
attributes(device) subroutine testAtomic(aa, n)
integer :: aa(*)
More information about the flang-commits
mailing list