[flang-commits] [flang] 0ce03c2 - [flang][cuda] Add interface and lowering for atomicadd_r4x2 and atomicadd_r4x4 (#166308)
via flang-commits
flang-commits at lists.llvm.org
Tue Nov 4 09:33:13 PST 2025
Author: Valentin Clement (バレンタイン クレメン)
Date: 2025-11-04T09:33:09-08:00
New Revision: 0ce03c2be4c43d19e4b63d805b13838f56621f3c
URL: https://github.com/llvm/llvm-project/commit/0ce03c2be4c43d19e4b63d805b13838f56621f3c
DIFF: https://github.com/llvm/llvm-project/commit/0ce03c2be4c43d19e4b63d805b13838f56621f3c.diff
LOG: [flang][cuda] Add interface and lowering for atomicadd_r4x2 and atomicadd_r4x4 (#166308)
Added:
Modified:
flang/include/flang/Optimizer/Builder/IntrinsicCall.h
flang/lib/Optimizer/Builder/IntrinsicCall.cpp
flang/module/cudadevice.f90
flang/test/Lower/CUDA/cuda-atomicadd.cuf
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index bbdef481a2085..b64419f5ae6da 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -190,6 +190,7 @@ struct IntrinsicLibrary {
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue genAtomicAddR2(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
+ template <int extent>
fir::ExtendedValue genAtomicAddVector(mlir::Type,
llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index b9ea8b125b780..3156c8cb4332c 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -290,12 +290,12 @@ static constexpr IntrinsicHandler handlers[]{
{"atan2pi", &I::genAtanpi},
{"atand", &I::genAtand},
{"atanpi", &I::genAtanpi},
- {"atomicadd_r2x2",
- &I::genAtomicAddVector,
+ {"atomicadd_r4x2",
+ &I::genAtomicAddVector<2>,
{{{"a", asAddr}, {"v", asAddr}}},
false},
- {"atomicadd_r4x2",
- &I::genAtomicAddVector,
+ {"atomicadd_r4x4",
+ &I::genAtomicAddVector<4>,
{{{"a", asAddr}, {"v", asAddr}}},
false},
{"atomicaddd", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
@@ -306,6 +306,14 @@ static constexpr IntrinsicHandler handlers[]{
&I::genAtomicAddR2,
{{{"a", asAddr}, {"v", asAddr}}},
false},
+ {"atomicaddvector_r2x2",
+ &I::genAtomicAddVector<2>,
+ {{{"a", asAddr}, {"v", asAddr}}},
+ false},
+ {"atomicaddvector_r4x2",
+ &I::genAtomicAddVector<2>,
+ {{{"a", asAddr}, {"v", asAddr}}},
+ false},
{"atomicandi", &I::genAtomicAnd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomiccasd",
&I::genAtomicCas,
@@ -3176,44 +3184,51 @@ IntrinsicLibrary::genAtomicAddR2(mlir::Type resultType,
mlir::ArrayRef<int64_t>{0});
}
+template <int extent>
fir::ExtendedValue
IntrinsicLibrary::genAtomicAddVector(mlir::Type resultType,
llvm::ArrayRef<fir::ExtendedValue> args) {
assert(args.size() == 2);
mlir::Value res = fir::AllocaOp::create(
- builder, loc, fir::SequenceType::get({2}, resultType));
+ builder, loc, fir::SequenceType::get({extent}, resultType));
mlir::Value a = fir::getBase(args[0]);
if (mlir::isa<fir::BaseBoxType>(a.getType())) {
a = fir::BoxAddrOp::create(builder, loc, a);
}
- auto vecTy = mlir::VectorType::get({2}, resultType);
+ auto vecTy = mlir::VectorType::get({extent}, resultType);
auto refTy = fir::ReferenceType::get(resultType);
mlir::Type i32Ty = builder.getI32Type();
mlir::Type idxTy = builder.getIndexType();
- mlir::Value zero = builder.createIntegerConstant(loc, idxTy, 0);
- mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1);
- mlir::Value v1Coord = fir::CoordinateOp::create(builder, loc, refTy,
- fir::getBase(args[1]), zero);
- mlir::Value v2Coord = fir::CoordinateOp::create(builder, loc, refTy,
- fir::getBase(args[1]), one);
- mlir::Value v1 = fir::LoadOp::create(builder, loc, v1Coord);
- mlir::Value v2 = fir::LoadOp::create(builder, loc, v2Coord);
+
+ // Extract the values from the array.
+ llvm::SmallVector<mlir::Value> values;
+ for (unsigned i = 0; i < extent; ++i) {
+ mlir::Value pos = builder.createIntegerConstant(loc, idxTy, i);
+ mlir::Value coord = fir::CoordinateOp::create(builder, loc, refTy,
+ fir::getBase(args[1]), pos);
+ mlir::Value value = fir::LoadOp::create(builder, loc, coord);
+ values.push_back(value);
+ }
+ // Pack extracted values into a vector to call the atomic add.
mlir::Value undef = mlir::LLVM::UndefOp::create(builder, loc, vecTy);
- mlir::Value vec1 = mlir::LLVM::InsertElementOp::create(
- builder, loc, undef, v1, builder.createIntegerConstant(loc, i32Ty, 0));
- mlir::Value vec2 = mlir::LLVM::InsertElementOp::create(
- builder, loc, vec1, v2, builder.createIntegerConstant(loc, i32Ty, 1));
+ for (unsigned i = 0; i < extent; ++i) {
+ mlir::Value insert = mlir::LLVM::InsertElementOp::create(
+ builder, loc, undef, values[i],
+ builder.createIntegerConstant(loc, i32Ty, i));
+ undef = insert;
+ }
+ // Atomic operation with a vector of values.
mlir::Value add =
- genAtomBinOp(builder, loc, mlir::LLVM::AtomicBinOp::fadd, a, vec2);
- mlir::Value r1 = mlir::LLVM::ExtractElementOp::create(
- builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 0));
- mlir::Value r2 = mlir::LLVM::ExtractElementOp::create(
- builder, loc, add, builder.createIntegerConstant(loc, i32Ty, 1));
- mlir::Value c1 = fir::CoordinateOp::create(builder, loc, refTy, res, zero);
- mlir::Value c2 = fir::CoordinateOp::create(builder, loc, refTy, res, one);
- fir::StoreOp::create(builder, loc, r1, c1);
- fir::StoreOp::create(builder, loc, r2, c2);
- mlir::Value ext = builder.createIntegerConstant(loc, idxTy, 2);
+ genAtomBinOp(builder, loc, mlir::LLVM::AtomicBinOp::fadd, a, undef);
+ // Store results in the result array.
+ for (unsigned i = 0; i < extent; ++i) {
+ mlir::Value r = mlir::LLVM::ExtractElementOp::create(
+ builder, loc, add, builder.createIntegerConstant(loc, i32Ty, i));
+ mlir::Value c = fir::CoordinateOp::create(
+ builder, loc, refTy, res, builder.createIntegerConstant(loc, idxTy, i));
+ fir::StoreOp::create(builder, loc, r, c);
+ }
+ mlir::Value ext = builder.createIntegerConstant(loc, idxTy, extent);
return fir::ArrayBoxValue(res, {ext});
}
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index b1aef95cba8c9..27097193aaa9b 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -1179,13 +1179,22 @@ attributes(device) pure integer(4) function atomicaddr2(address, val)
end interface
interface atomicaddvector
- attributes(device) pure function atomicadd_r2x2(address, val) result(z)
+ attributes(device) pure function atomicaddvector_r2x2(address, val) result(z)
!dir$ ignore_tkr (rd) address, (d) val
real(2), dimension(2), intent(inout) :: address
real(2), dimension(2), intent(in) :: val
real(2), dimension(2) :: z
end function
+ attributes(device) pure function atomicaddvector_r4x2(address, val) result(z)
+ !dir$ ignore_tkr (rd) address, (d) val
+ real(4), dimension(2), intent(inout) :: address
+ real(4), dimension(2), intent(in) :: val
+ real(4), dimension(2) :: z
+ end function
+ end interface
+
+ interface atomicaddreal4x2
attributes(device) pure function atomicadd_r4x2(address, val) result(z)
!dir$ ignore_tkr (rd) address, (d) val
real(4), dimension(2), intent(inout) :: address
@@ -1194,6 +1203,15 @@ attributes(device) pure function atomicadd_r4x2(address, val) result(z)
end function
end interface
+ interface atomicaddreal4x4
+ attributes(device) pure function atomicadd_r4x4(address, val) result(z)
+ !dir$ ignore_tkr (rd) address, (d) val
+ real(4), dimension(4), intent(inout) :: address
+ real(4), dimension(4), intent(in) :: val
+ real(4), dimension(4) :: z
+ end function
+ end interface
+
interface atomicsub
attributes(device) pure integer function atomicsubi(address, val)
!dir$ ignore_tkr (d) address, (d) val
diff --git a/flang/test/Lower/CUDA/cuda-atomicadd.cuf b/flang/test/Lower/CUDA/cuda-atomicadd.cuf
index baa6cdb3d5869..6669b4afa291d 100644
--- a/flang/test/Lower/CUDA/cuda-atomicadd.cuf
+++ b/flang/test/Lower/CUDA/cuda-atomicadd.cuf
@@ -2,18 +2,34 @@
! Test CUDA Fortran atmoicadd functions available cudadevice module
-attributes(global) subroutine atomicaddvector_r2()
+attributes(global) subroutine test_atomicaddvector_r2()
real(2), device :: a(2), tmp1(2), tmp2(2)
tmp1 = atomicAddVector(a, tmp2)
end subroutine
-! CHECK-LABEL: func.func @_QPatomicaddvector_r2() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK-LABEL: func.func @_QPtest_atomicaddvector_r2() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf16>
-attributes(global) subroutine atomicaddvector_r4()
+attributes(global) subroutine test_atomicaddvector_r4()
real(4), device :: a(2), tmp1(2), tmp2(2)
tmp1 = atomicAddVector(a, tmp2)
end subroutine
-! CHECK-LABEL: func.func @_QPatomicaddvector_r4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK-LABEL: func.func @_QPtest_atomicaddvector_r4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf32>
+
+attributes(global) subroutine test_atomicadd_r2x4()
+ real(4), device :: a(2), tmp1(2), tmp2(2)
+ tmp1 = atomicaddreal4x2(a, tmp2)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_atomicadd_r2x4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<2xf32>
+
+attributes(global) subroutine test_atomicadd_r4x4()
+ real(4), device :: a(4), tmp1(4), tmp2(4)
+ tmp1 = atomicaddreal4x4(a, tmp2)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_atomicadd_r4x4() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
+! CHECK: llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, vector<4xf32>
More information about the flang-commits
mailing list