[flang-commits] [flang] [flang][cuda] Add interfaces and lowering for atomicaddvector (PR #166275)
Valentin Clement バレンタイン クレメン via flang-commits
flang-commits at lists.llvm.org
Mon Nov 3 17:02:55 PST 2025
https://github.com/clementval updated https://github.com/llvm/llvm-project/pull/166275
>From 8473ab3a33be6b1de82bf12ad5eb563af15d80c7 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 3 Nov 2025 16:46:07 -0800
Subject: [PATCH 1/3] [flang][cuda] Add interfaces and lowering for
atomicaddvector
---
.../flang/Optimizer/Builder/IntrinsicCall.h | 2 +
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 50 +++++++++++++++++++
flang/module/cudadevice.f90 | 16 ++++++
flang/test/Lower/CUDA/cuda-atomicadd.cuf | 19 +++++++
4 files changed, 87 insertions(+)
create mode 100644 flang/test/Lower/CUDA/cuda-atomicadd.cuf
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 9f15ce68eb3d5..bbdef481a2085 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -190,6 +190,8 @@ struct IntrinsicLibrary {
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicAddR2(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
+ fir::ExtendedValue genAtomicAddVector(mlir::Type,
+ llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicCas(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 6ebd52dcd42ea..d329bd9f14cc5 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -290,6 +290,14 @@ static constexpr IntrinsicHandler handlers[]{
{"atan2pi", &I::genAtanpi},
{"atand", &I::genAtand},
{"atanpi", &I::genAtanpi},
+ {"atomicadd_r2x2",
+ &I::genAtomicAddVector,
+ {{{"a", asAddr}, {"v", asAddr}}},
+ false},
+ {"atomicadd_r4x2",
+ &I::genAtomicAddVector,
+ {{{"a", asAddr}, {"v", asAddr}}},
+ false},
{"atomicaddd", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddf", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddi", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
@@ -3168,6 +3176,48 @@ IntrinsicLibrary::genAtomicAddR2(mlir::Type resultType,
mlir::ArrayRef<int64_t>{0});
}
+fir::ExtendedValue
+IntrinsicLibrary::genAtomicAddVector(mlir::Type resultType,
+ llvm::ArrayRef<fir::ExtendedValue> args) {
+ assert(args.size() == 2);
+ mlir::Value res = fir::AllocaOp::create(
+ builder, loc, fir::SequenceType::get({2}, resultType));
+ mlir::Value a = fir::getBase(args[0]);
+ if (mlir::isa<fir::BaseBoxType>(a.getType())) {
+ a = fir::BoxAddrOp::create(builder, loc, a);
+ }
+ auto eleTy = fir::unwrapSequenceType(resultType);
+ auto loc = builder.getUnknownLoc();
+ auto i32Ty = builder.getI32Type();
+ auto vecTy = mlir::VectorType::get({2}, eleTy);
+ mlir::Type idxTy = builder.getIndexType();
+ auto refTy = fir::ReferenceType::get(eleTy);
+ auto zero = builder.createIntegerConstant(loc, idxTy, 0);
+ auto one = builder.createIntegerConstant(loc, idxTy, 1);
+ auto v1Coord = fir::CoordinateOp::create(builder, loc, refTy,
+ fir::getBase(args[1]), zero);
+ auto v2Coord = fir::CoordinateOp::create(builder, loc, refTy,
+ fir::getBase(args[1]), one);
+ auto v1 = fir::LoadOp::create(builder, loc, v1Coord);
+ auto v2 = fir::LoadOp::create(builder, loc, v2Coord);
+ mlir::Value undef = mlir::LLVM::UndefOp::create(builder, loc, vecTy);
+ mlir::Value vec1 = mlir::LLVM::InsertElementOp::create(
+ builder, loc, undef, v1, builder.createIntegerConstant(loc, i32Ty, 0));
+ mlir::Value vec2 = mlir::LLVM::InsertElementOp::create(
+ builder, loc, vec1, v2, builder.createIntegerConstant(loc, i32Ty, 1));
+ auto add = genAtomBinOp(builder, loc, mlir::LLVM::AtomicBinOp::fadd, a, vec2);
+ auto r1 = mlir::LLVM::ExtractElementOp::create(
+ builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 0));
+ auto r2 = mlir::LLVM::ExtractElementOp::create(
+ builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 1));
+ auto c1 = fir::CoordinateOp::create(builder, loc, refTy, res, zero);
+ auto c2 = fir::CoordinateOp::create(builder, loc, refTy, res, one);
+ fir::StoreOp::create(builder, loc, r1, c1);
+ fir::StoreOp::create(builder, loc, r2, c2);
+ mlir::Value ext = builder.createIntegerConstant(loc, idxTy, 2);
+ return fir::ArrayBoxValue(res, {ext});
+}
+
mlir::Value IntrinsicLibrary::genAtomicSub(mlir::Type resultType,
llvm::ArrayRef<mlir::Value> args) {
assert(args.size() == 2);
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 7a764b589dc56..b1aef95cba8c9 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -1178,6 +1178,22 @@ attributes(device) pure integer(4) function atomicaddr2(address, val)
end function
end interface
+ interface atomicaddvector
+ attributes(device) pure function atomicadd_r2x2(address, val) result(z)
+ !dir$ ignore_tkr (rd) address, (d) val
+ real(2), dimension(2), intent(inout) :: address
+ real(2), dimension(2), intent(in) :: val
+ real(2), dimension(2) :: z
+ end function
+
+ attributes(device) pure function atomicadd_r4x2(address, val) result(z)
+ !dir$ ignore_tkr (rd) address, (d) val
+ real(4), dimension(2), intent(inout) :: address
+ real(4), dimension(2), intent(in) :: val
+ real(4), dimension(2) :: z
+ end function
+ end interface
+
interface atomicsub
attributes(device) pure integer function atomicsubi(address, val)
!dir$ ignore_tkr (d) address, (d) val
diff --git a/flang/test/Lower/CUDA/cuda-atomicadd.cuf b/flang/test/Lower/CUDA/cuda-atomicadd.cuf
new file mode 100644
index 0000000000000..1669674e8d4ce
--- /dev/null
+++ b/flang/test/Lower/CUDA/cuda-atomicadd.cuf
@@ -0,0 +1,19 @@
+! RUN: bbc -emit-hlfir -fcuda %s -o - | FileCheck %s
+
+! Test CUDA Fortran atmoicadd functions available cudadevice module
+
+attributes(global) subroutine atomicaddvector_r2()
+ real(2), device :: a(2), tmp1(2), tmp2(2)
+ tmp1 = atomicAddVector(a, tmp2)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPatomicaddvector_r2() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf16>
+
+attributes(global) subroutine atomicaddvector_r4()
+ real(4), device :: a(2), tmp1(2), tmp2(2)
+ tmp1 = atomicAddVector(a, tmp2)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPatomicaddvector_r2() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<f32>
>From c5df064ad1d84a947ac06cb9db0877fcfa016870 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 3 Nov 2025 16:56:49 -0800
Subject: [PATCH 2/3] cleanup
---
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 6 ++----
1 file changed, 2 insertions(+), 4 deletions(-)
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index d329bd9f14cc5..de3adfa0e8d40 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -3186,12 +3186,10 @@ IntrinsicLibrary::genAtomicAddVector(mlir::Type resultType,
if (mlir::isa<fir::BaseBoxType>(a.getType())) {
a = fir::BoxAddrOp::create(builder, loc, a);
}
- auto eleTy = fir::unwrapSequenceType(resultType);
- auto loc = builder.getUnknownLoc();
auto i32Ty = builder.getI32Type();
- auto vecTy = mlir::VectorType::get({2}, eleTy);
+ auto vecTy = mlir::VectorType::get({2}, resultType);
mlir::Type idxTy = builder.getIndexType();
- auto refTy = fir::ReferenceType::get(eleTy);
+ auto refTy = fir::ReferenceType::get(resultType);
auto zero = builder.createIntegerConstant(loc, idxTy, 0);
auto one = builder.createIntegerConstant(loc, idxTy, 1);
auto v1Coord = fir::CoordinateOp::create(builder, loc, refTy,
>From 5487e9e342427e33ab0670b9d979c3d9ce586d95 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval at gmail.com>
Date: Mon, 3 Nov 2025 17:02:15 -0800
Subject: [PATCH 3/3] More cleanup
---
flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 31 ++++++++++---------
1 file changed, 16 insertions(+), 15 deletions(-)
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index de3adfa0e8d40..18217310e2707 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -3186,30 +3186,31 @@ IntrinsicLibrary::genAtomicAddVector(mlir::Type resultType,
if (mlir::isa<fir::BaseBoxType>(a.getType())) {
a = fir::BoxAddrOp::create(builder, loc, a);
}
- auto i32Ty = builder.getI32Type();
auto vecTy = mlir::VectorType::get({2}, resultType);
- mlir::Type idxTy = builder.getIndexType();
auto refTy = fir::ReferenceType::get(resultType);
- auto zero = builder.createIntegerConstant(loc, idxTy, 0);
- auto one = builder.createIntegerConstant(loc, idxTy, 1);
- auto v1Coord = fir::CoordinateOp::create(builder, loc, refTy,
- fir::getBase(args[1]), zero);
- auto v2Coord = fir::CoordinateOp::create(builder, loc, refTy,
- fir::getBase(args[1]), one);
- auto v1 = fir::LoadOp::create(builder, loc, v1Coord);
- auto v2 = fir::LoadOp::create(builder, loc, v2Coord);
+ mlir::Type i32Ty = builder.getI32Type();
+ mlir::Type idxTy = builder.getIndexType();
+ mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
+ mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
+ mlir::Value v1Coord = fir::CoordinateOp::create(builder, loc, refTy,
+ fir::getBase(args[1]), zero);
+ mlir::Value v2Coord = fir::CoordinateOp::create(builder, loc, refTy,
+ fir::getBase(args[1]), one);
+ mlir::Value v1 = fir::LoadOp::create(builder, loc, v1Coord);
+ mlir::Value v2 = fir::LoadOp::create(builder, loc, v2Coord);
mlir::Value undef = mlir::LLVM::UndefOp::create(builder, loc, vecTy);
mlir::Value vec1 = mlir::LLVM::InsertElementOp::create(
builder, loc, undef, v1, builder.createIntegerConstant(loc, i32Ty, 0));
mlir::Value vec2 = mlir::LLVM::InsertElementOp::create(
builder, loc, vec1, v2, builder.createIntegerConstant(loc, i32Ty, 1));
- auto add = genAtomBinOp(builder, loc, mlir::LLVM::AtomicBinOp::fadd, a, vec2);
- auto r1 = mlir::LLVM::ExtractElementOp::create(
+ mlir::Value add =
+ genAtomBinOp(builder, loc, mlir::LLVM::AtomicBinOp::fadd, a, vec2);
+ mlir::Value r1 = mlir::LLVM::ExtractElementOp::create(
builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 0));
- auto r2 = mlir::LLVM::ExtractElementOp::create(
+ mlir::Value r2 = mlir::LLVM::ExtractElementOp::create(
builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 1));
- auto c1 = fir::CoordinateOp::create(builder, loc, refTy, res, zero);
- auto c2 = fir::CoordinateOp::create(builder, loc, refTy, res, one);
+ mlir::Value c1 = fir::CoordinateOp::create(builder, loc, refTy, res, zero);
+ mlir::Value c2 = fir::CoordinateOp::create(builder, loc, refTy, res, one);
fir::StoreOp::create(builder, loc, r1, c1);
fir::StoreOp::create(builder, loc, r2, c2);
mlir::Value ext = builder.createIntegerConstant(loc, idxTy, 2);
More information about the flang-commits
mailing list