[flang-commits] [flang] 56a0a7f - [flang][cuda] Adding support for more atomic calls (#124671)
via flang-commits
flang-commits at lists.llvm.org
Tue Jan 28 08:36:47 PST 2025
Author: Renaud Kauffmann
Date: 2025-01-28T08:36:43-08:00
New Revision: 56a0a7f6d188b13be69962654f068dc01dfd37b5
URL: https://github.com/llvm/llvm-project/commit/56a0a7f6d188b13be69962654f068dc01dfd37b5
DIFF: https://github.com/llvm/llvm-project/commit/56a0a7f6d188b13be69962654f068dc01dfd37b5.diff
LOG: [flang][cuda] Adding support for more atomic calls (#124671)
The PR follows the earlier
https://github.com/llvm/llvm-project/pull/123840 PR for atomic operation
support in CUF
Added:
Modified:
flang/include/flang/Optimizer/Builder/IntrinsicCall.h
flang/lib/Optimizer/Builder/IntrinsicCall.cpp
flang/module/cudadevice.f90
flang/test/Lower/CUDA/cuda-device-proc.cuf
Removed:
################################################################################
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index e2ea89483ef11f..52ada485033323 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -186,6 +186,13 @@ struct IntrinsicLibrary {
fir::ExtendedValue genAny(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAtanpi(mlir::Type, llvm::ArrayRef<mlir::Value>);
mlir::Value genAtomicAdd(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genAtomicAnd(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genAtomicOr(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genAtomicDec(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genAtomicInc(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genAtomicMax(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genAtomicMin(mlir::Type, llvm::ArrayRef<mlir::Value>);
+ mlir::Value genAtomicSub(mlir::Type, llvm::ArrayRef<mlir::Value>);
fir::ExtendedValue
genCommandArgumentCount(mlir::Type, llvm::ArrayRef<fir::ExtendedValue>);
mlir::Value genAsind(mlir::Type, llvm::ArrayRef<mlir::Value>);
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index db9918c265164d..e75a29c968d177 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -151,6 +151,22 @@ static constexpr IntrinsicHandler handlers[]{
{"atomicaddf", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddi", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
{"atomicaddl", &I::genAtomicAdd, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicandi", &I::genAtomicAnd, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicdeci", &I::genAtomicDec, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicinci", &I::genAtomicInc, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicmaxd", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicmaxf", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicmaxi", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicmaxl", &I::genAtomicMax, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicmind", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicminf", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicmini", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicminl", &I::genAtomicMin, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicori", &I::genAtomicOr, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicsubd", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicsubf", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicsubi", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
+ {"atomicsubl", &I::genAtomicSub, {{{"a", asAddr}, {"v", asValue}}}, false},
{"bessel_jn",
&I::genBesselJn,
{{{"n1", asValue}, {"n2", asValue}, {"x", asValue}}},
@@ -2600,6 +2616,75 @@ mlir::Value IntrinsicLibrary::genAtomicAdd(mlir::Type resultType,
return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
}
+mlir::Value IntrinsicLibrary::genAtomicSub(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+
+ mlir::LLVM::AtomicBinOp binOp =
+ mlir::isa<mlir::IntegerType>(args[1].getType())
+ ? mlir::LLVM::AtomicBinOp::sub
+ : mlir::LLVM::AtomicBinOp::fsub;
+ return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
+}
+
+mlir::Value IntrinsicLibrary::genAtomicAnd(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+ assert(mlir::isa<mlir::IntegerType>(args[1].getType()));
+
+ mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::_and;
+ return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
+}
+
+mlir::Value IntrinsicLibrary::genAtomicOr(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+ assert(mlir::isa<mlir::IntegerType>(args[1].getType()));
+
+ mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::_or;
+ return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
+}
+
+mlir::Value IntrinsicLibrary::genAtomicDec(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+ assert(mlir::isa<mlir::IntegerType>(args[1].getType()));
+
+ mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::udec_wrap;
+ return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
+}
+
+mlir::Value IntrinsicLibrary::genAtomicInc(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+ assert(mlir::isa<mlir::IntegerType>(args[1].getType()));
+
+ mlir::LLVM::AtomicBinOp binOp = mlir::LLVM::AtomicBinOp::uinc_wrap;
+ return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
+}
+
+mlir::Value IntrinsicLibrary::genAtomicMax(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+
+ mlir::LLVM::AtomicBinOp binOp =
+ mlir::isa<mlir::IntegerType>(args[1].getType())
+ ? mlir::LLVM::AtomicBinOp::max
+ : mlir::LLVM::AtomicBinOp::fmax;
+ return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
+}
+
+mlir::Value IntrinsicLibrary::genAtomicMin(mlir::Type resultType,
+ llvm::ArrayRef<mlir::Value> args) {
+ assert(args.size() == 2);
+
+ mlir::LLVM::AtomicBinOp binOp =
+ mlir::isa<mlir::IntegerType>(args[1].getType())
+ ? mlir::LLVM::AtomicBinOp::min
+ : mlir::LLVM::AtomicBinOp::fmin;
+ return genAtomBinOp(builder, loc, binOp, args[0], args[1]);
+}
+
// ASSOCIATED
fir::ExtendedValue
IntrinsicLibrary::genAssociated(mlir::Type resultType,
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index 53b6beaaf1ad8f..af516a1866fa97 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -106,10 +106,10 @@ attributes(device) pure real function atomicaddf(address, val)
real, intent(inout) :: address
real, value :: val
end function
- attributes(device) pure real*8 function atomicaddd(address, val)
+ attributes(device) pure real(8) function atomicaddd(address, val)
!dir$ ignore_tkr (d) address, (d) val
- real*8, intent(inout) :: address
- real*8, value :: val
+ real(8), intent(inout) :: address
+ real(8), value :: val
end function
attributes(device) pure integer(8) function atomicaddl(address, val)
!dir$ ignore_tkr (d) address, (d) val
@@ -117,6 +117,115 @@ attributes(device) pure integer(8) function atomicaddl(address, val)
integer(8), value :: val
end function
end interface
-public :: atomicadd
+ public :: atomicadd
+
+ interface atomicsub
+ attributes(device) pure integer function atomicsubi(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ integer, intent(inout) :: address
+ integer, value :: val
+ end function
+ attributes(device) pure real function atomicsubf(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ real, intent(inout) :: address
+ real, value :: val
+ end function
+ attributes(device) pure real(8) function atomicsubd(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ real(8), intent(inout) :: address
+ real(8), value :: val
+ end function
+ attributes(device) pure integer(8) function atomicsubl(address, val)
+ !dir$ ignore_tkr (d) address, (dk) val
+ integer(8), intent(inout) :: address
+ integer(8), value :: val
+ end function
+ end interface
+ public :: atomicsub
+
+ interface atomicmax
+ attributes(device) pure integer function atomicmaxi(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ integer, intent(inout) :: address
+ integer, value :: val
+ end function
+ attributes(device) pure real function atomicmaxf(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ real, intent(inout) :: address
+ real, value :: val
+ end function
+ attributes(device) pure real(8) function atomicmaxd(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ real(8), intent(inout) :: address
+ real(8), value :: val
+ end function
+ attributes(device) pure integer(8) function atomicmaxl(address, val)
+ !dir$ ignore_tkr (d) address, (dk) val
+ integer(8), intent(inout) :: address
+ integer(8), value :: val
+ end function
+ end interface
+ public :: atomicmax
+
+ interface atomicmin
+ attributes(device) pure integer function atomicmini(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ integer, intent(inout) :: address
+ integer, value :: val
+ end function
+ attributes(device) pure real function atomicminf(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ real, intent(inout) :: address
+ real, value :: val
+ end function
+ attributes(device) pure real(8) function atomicmind(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ real(8), intent(inout) :: address
+ real(8), value :: val
+ end function
+ attributes(device) pure integer(8) function atomicminl(address, val)
+ !dir$ ignore_tkr (d) address, (dk) val
+ integer(8), intent(inout) :: address
+ integer(8), value :: val
+ end function
+ end interface
+ public :: atomicmin
+
+ interface atomicand
+ attributes(device) pure integer function atomicandi(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ integer, intent(inout) :: address
+ integer, value :: val
+ end function
+ end interface
+ public :: atomicand
+
+ interface atomicor
+ attributes(device) pure integer function atomicori(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ integer, intent(inout) :: address
+ integer, value :: val
+ end function
+ end interface
+ public :: atomicor
+
+ interface atomicinc
+ attributes(device) pure integer function atomicinci(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ integer, intent(inout) :: address
+ integer, value :: val
+ end function
+ end interface
+ public :: atomicinc
+
+ interface atomicdec
+ attributes(device) pure integer function atomicdeci(address, val)
+ !dir$ ignore_tkr (d) address, (d) val
+ integer, intent(inout) :: address
+ integer, value :: val
+ end function
+ end interface
+ public :: atomicdec
+
end module
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 661e5728bf85b8..7ef391c7d308ba 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -23,6 +23,26 @@ attributes(global) subroutine devsub()
al = atomicadd(al, 1_8)
af = atomicadd(af, 1.0_4)
ad = atomicadd(ad, 1.0_8)
+
+ ai = atomicsub(ai, 1_4)
+ al = atomicsub(al, 1_8)
+ af = atomicsub(af, 1.0_4)
+ ad = atomicsub(ad, 1.0_8)
+
+ ai = atomicmax(ai, 1_4)
+ al = atomicmax(al, 1_8)
+ af = atomicmax(af, 1.0_4)
+ ad = atomicmax(ad, 1.0_8)
+
+ ai = atomicmin(ai, 1_4)
+ al = atomicmin(al, 1_8)
+ af = atomicmin(af, 1.0_4)
+ ad = atomicmin(ad, 1.0_8)
+
+ ai = atomicand(ai, 1_4)
+ ai = atomicor(ai, 1_4)
+ ai = atomicinc(ai, 1_4)
+ ai = atomicdec(ai, 1_4)
end
! CHECK-LABEL: func.func @_QPdevsub() attributes {cuf.proc_attr = #cuf.cuda_proc<global>}
@@ -39,6 +59,26 @@ end
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
! CHECK: %{{.*}} = llvm.atomicrmw fadd %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
+! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
+! CHECK: %{{.*}} = llvm.atomicrmw sub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
+! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
+! CHECK: %{{.*}} = llvm.atomicrmw fsub %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
+
+! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
+! CHECK: %{{.*}} = llvm.atomicrmw max %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
+! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
+! CHECK: %{{.*}} = llvm.atomicrmw fmax %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
+
+! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
+! CHECK: %{{.*}} = llvm.atomicrmw min %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i64
+! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f32
+! CHECK: %{{.*}} = llvm.atomicrmw fmin %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, f64
+
+! CHECK: %{{.*}} = llvm.atomicrmw _and %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
+! CHECK: %{{.*}} = llvm.atomicrmw _or %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
+! CHECK: %{{.*}} = llvm.atomicrmw uinc_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
+! CHECK: %{{.*}} = llvm.atomicrmw udec_wrap %{{.*}}, %{{.*}} seq_cst : !llvm.ptr, i32
+
! CHECK: func.func private @llvm.nvvm.barrier0()
! CHECK: func.func private @__syncwarp(!fir.ref<i32> {cuf.data_attr = #cuf.cuda<device>}) attributes {cuf.proc_attr = #cuf.cuda_proc<device>, fir.bindc_name = "__syncwarp", fir.proc_attrs = #fir.proc_attrs<bind_c>}
! CHECK: func.func private @llvm.nvvm.membar.gl()
More information about the flang-commits
mailing list