[flang-commits] [flang] [flang][cuda] Allow array pointer for atomicexch and atomiccas (PR #130363)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Fri Mar 7 14:52:27 PST 2025
https://github.com/clementval created https://github.com/llvm/llvm-project/pull/130363
Similar to #130331 but fixes `atomicexch` and `atomiccas` to accept array pointer. Their interfaces as the `ignore_tkr (rd)` on the `address` dummy argument.
>From 10a956d2eee61c779544019d48889f03b5ef66d0 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Fri, 7 Mar 2025 14:50:07 -0800
Subject: [PATCH] [flang][cuda] Allow array pointer for atomicexch and
atomiccas
---
.../flang/Optimizer/Builder/IntrinsicCall.h | 6 +++--
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 23 +++++++++++--------
flang/test/Lower/CUDA/cuda-device-proc.cuf | 5 ++++
3 files changed, 23 insertions(+), 11 deletions(-)
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 778fcf1d93d8b..54a9e0b867d6c 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -187,9 +187,11 @@ struct IntrinsicLibrary {
mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
- mlir::Value genAtomicCas(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ fir::ExtendedValue genAtomicCas(mlir::Type,
+ llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
- mlir::Value genAtomicExch(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ fir::ExtendedValue genAtomicExch(mlir::Type,
+ llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 0a70ed3c6bdf1..63127f046cf42 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -2740,15 +2740,17 @@ mlir::Value IntrinsicLibrary::genAtomicOr(mlir::Type resultType,
}
// ATOMICCAS
-mlir::Value IntrinsicLibrary::genAtomicCas(mlir::Type resultType,
- llvm::ArrayRef<mlir::Value> args) {
+fir::ExtendedValue
+IntrinsicLibrary::genAtomicCas(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 3);
auto successOrdering = mlir::LLVM::AtomicOrdering::acq_rel;
auto failureOrdering = mlir::LLVM::AtomicOrdering::monotonic;
auto llvmPtrTy = mlir::LLVM::LLVMPointerType::get(resultType.getContext());
- mlir::Value arg1 = args[1];
- mlir::Value arg2 = args[2];
+ mlir::Value arg0 = fir::getBase(args[0]);
+ mlir::Value arg1 = fir::getBase(args[1]);
+ mlir::Value arg2 = fir::getBase(args[2]);
auto bitCastFloat = [&](mlir::Value arg) -> mlir::Value {
if (mlir::isa<mlir::Float32Type>(arg.getType()))
@@ -2769,7 +2771,7 @@ mlir::Value IntrinsicLibrary::genAtomicCas(mlir::Type resultType,
}
auto address =
- builder.create<mlir::UnrealizedConversionCastOp>(loc, llvmPtrTy, args[0])
+ builder.create<mlir::UnrealizedConversionCastOp>(loc, llvmPtrTy, arg0)
.getResult(0);
auto cmpxchg = builder.create<mlir::LLVM::AtomicCmpXchgOp>(
loc, address, arg1, arg2, successOrdering, failureOrdering);
@@ -2786,13 +2788,16 @@ mlir::Value IntrinsicLibrary::genAtomicDec(mlir::Type resultType,
}
// ATOMICEXCH
-mlir::Value IntrinsicLibrary::genAtomicExch(mlir::Type resultType,
- llvm::ArrayRef<mlir::Value> args) {
+fir::ExtendedValue
+IntrinsicLibrary::genAtomicExch(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 2);
- assert(args[1].getType().isIntOrFloat());
+ mlir::Value arg0 = fir::getBase(args[0]);
+ mlir::Value arg1 = fir::getBase(args[1]);
+ assert(arg1.getType().isIntOrFloat());
mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::xchg;
- return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
+ return genAtomBinOp(builder, loc, binOp, arg0, arg1);
}
mlir::Value IntrinsicLibrary::genAtomicInc(mlir::Type resultType,
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 5c7f334bae8ba..d683e8b4caf85 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -161,6 +161,8 @@ attributes(device) subroutine testAtomic(aa, n)
istat = atomiccas(a, i, 14)
do i = 1, n
istat = atomicxor(aa, i)
+ istat = atomiccas(aa, i, 14)
+ istat = atomicexch(aa, 0)
end do
end subroutine
@@ -172,6 +174,9 @@ end subroutine
! CHECK: llvm.cmpxchg %[[ADDR]], %{{.*}}, %c14{{.*}} acq_rel monotonic : !llvm.ptr, i32
! CHECK: fir.do_loop
! CHECK: llvm.atomicrmw _xor %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
+! CHECK: %[[ADDR:.*]] = builtin.unrealized_conversion_cast %{{.*}}#1 : !fir.ref<!fir.array<?xi32>> to !llvm.ptr
+! CHECK: llvm.cmpxchg %[[ADDR]], %{{.*}}, %c14{{.*}} acq_rel monotonic : !llvm.ptr, i32
+! CHECK: llvm.atomicrmw xchg %{{.*}}, %c0{{.*}} seq_cst : !llvm.ptr, i32
attributes(device) subroutine testAtomic2()
integer(8) :: a, i, istat
More information about the flang-commits
mailing list