[flang] [llvm] [mlir] [flang][OMPIRBuilder][MLIR][llvm] Backend support for atomic control options (PR #151579)
Anchu Rajendran S via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 7 11:38:05 PDT 2025
https://github.com/anchuraj updated https://github.com/llvm/llvm-project/pull/151579
>From 70547b5446c008bef574368fb8c388c4f9dea0d5 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Thu, 31 Jul 2025 13:56:32 -0500
Subject: [PATCH 1/2] [flang][OMPIRBuilder][MLIR][llvm] Backend support for
atomic control options
---
flang/test/Driver/atomic-control-options.f90 | 20 +++++++++++
.../llvm/Frontend/OpenMP/OMPIRBuilder.h | 11 ++++--
llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 36 +++++++++++++------
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 29 +++++++++++++--
.../omptarget-atomic-control-options.mlir | 36 +++++++++++++++++++
5 files changed, 117 insertions(+), 15 deletions(-)
create mode 100644 flang/test/Driver/atomic-control-options.f90
create mode 100644 mlir/test/Target/LLVMIR/omptarget-atomic-control-options.mlir
diff --git a/flang/test/Driver/atomic-control-options.f90 b/flang/test/Driver/atomic-control-options.f90
new file mode 100644
index 0000000000000..cb382f96a9d5f
--- /dev/null
+++ b/flang/test/Driver/atomic-control-options.f90
@@ -0,0 +1,20 @@
+! REQUIRES: amdgpu-registered-target
+! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -munsafe-fp-atomics %s -o -|FileCheck -check-prefix=UNSAFE-FP-ATOMICS %s
+! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-ignore-denormal-mode %s -o -|FileCheck -check-prefix=IGNORE-DENORMAL-MODE %s
+! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-fine-grained-memory %s -o -|FileCheck -check-prefix=FINE-GRAINED-MEMORY %s
+! RUN: %flang_fc1 -emit-llvm -triple amdgcn-amd-amdhsa -fopenmp -fopenmp-is-device -fatomic-remote-memory %s -o -|FileCheck -check-prefix=REMOTE-MEMORY %s
+program test
+ implicit none
+ integer :: A, threads
+ threads = 128
+ A = 0
+ !$omp target parallel num_threads(threads)
+ !$omp atomic
+ A = A + 1
+ !$omp end target parallel
+end program test
+
+!UNSAFE-FP-ATOMICS: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
+!IGNORE-DENORMAL-MODE: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
+!FINE-GRAINED-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.remote.memory !{{.*}}
+!REMOTE-MEMORY: %{{.*}} = atomicrmw add ptr {{.*}}, i32 1 monotonic, align 4, !amdgpu.no.fine.grained.memory !{{.*}}
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 206ad4a4ef85f..b681ea8413726 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -3286,7 +3286,8 @@ class OpenMPIRBuilder {
emitAtomicUpdate(InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
AtomicUpdateCallbackTy &UpdateOp, bool VolatileX,
- bool IsXBinopExpr);
+ bool IsXBinopExpr, bool IsIgnoreDenormalMode,
+ bool IsFineGrainedMemory, bool IsRemoteMemory);
/// Emit the binary op. described by \p RMWOp, using \p Src1 and \p Src2 .
///
@@ -3359,7 +3360,9 @@ class OpenMPIRBuilder {
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
- AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr);
+ AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
+ bool IsIgnoreDenormalMode = false, bool IsFineGrainedMemory = false,
+ bool IsRemoteMemory = false);
/// Emit atomic update for constructs: --- Only Scalar data types
/// V = X; X = X BinOp Expr ,
@@ -3394,7 +3397,9 @@ class OpenMPIRBuilder {
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
- bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr);
+ bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
+ bool IsIgnoreDenormalMode = false, bool IsFineGrainedMemory = false,
+ bool IsRemoteMemory = false);
/// Emit atomic compare for constructs: --- Only scalar data types
/// cond-expr-stmt:
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 79287729fbfd1..0aa4f303e96cc 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -8957,7 +8957,8 @@ OpenMPIRBuilder::createAtomicWrite(const LocationDescription &Loc,
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
- AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr) {
+ AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr,
+ bool IsIgnoreDenormalMode, bool IsFineGrainedMemory, bool IsRemoteMemory) {
assert(!isConflictIP(Loc.IP, AllocaIP) && "IPs must not be ambiguous");
if (!updateToLocation(Loc))
return Loc.IP;
@@ -8975,9 +8976,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
"OpenMP atomic does not support LT or GT operations");
});
- Expected<std::pair<Value *, Value *>> AtomicResult =
- emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp,
- X.IsVolatile, IsXBinopExpr);
+ Expected<std::pair<Value *, Value *>> AtomicResult = emitAtomicUpdate(
+ AllocaIP, X.Var, X.ElemTy, Expr, AO, RMWOp, UpdateOp, X.IsVolatile,
+ IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
if (!AtomicResult)
return AtomicResult.takeError();
checkAndEmitFlushAfterAtomic(Loc, AO, AtomicKind::Update);
@@ -9024,7 +9025,8 @@ Value *OpenMPIRBuilder::emitRMWOpAsInstruction(Value *Src1, Value *Src2,
Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
InsertPointTy AllocaIP, Value *X, Type *XElemTy, Value *Expr,
AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp,
- AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr) {
+ AtomicUpdateCallbackTy &UpdateOp, bool VolatileX, bool IsXBinopExpr,
+ bool IsIgnoreDenormalMode, bool IsFineGrainedMemory, bool IsRemoteMemory) {
// TODO: handle the case where XElemTy is not byte-sized or not a power of 2
// or a complex datatype.
bool emitRMWOp = false;
@@ -9047,7 +9049,20 @@ Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
std::pair<Value *, Value *> Res;
if (emitRMWOp) {
- Res.first = Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
+ AtomicRMWInst *RMWInst =
+ Builder.CreateAtomicRMW(RMWOp, X, Expr, llvm::MaybeAlign(), AO);
+ if (T.isAMDGPU()) {
+ if (IsIgnoreDenormalMode)
+ RMWInst->setMetadata("amdgpu.ignore.denormal.mode",
+ llvm::MDNode::get(Builder.getContext(), {}));
+ if (!IsFineGrainedMemory)
+ RMWInst->setMetadata("amdgpu.no.fine.grained.memory",
+ llvm::MDNode::get(Builder.getContext(), {}));
+ if (!IsRemoteMemory)
+ RMWInst->setMetadata("amdgpu.no.remote.memory",
+ llvm::MDNode::get(Builder.getContext(), {}));
+ }
+ Res.first = RMWInst;
// not needed except in case of postfix captures. Generate anyway for
// consistency with the else part. Will be removed with any DCE pass.
// AtomicRMWInst::Xchg does not have a coressponding instruction.
@@ -9179,7 +9194,8 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X,
AtomicOpValue &V, Value *Expr, AtomicOrdering AO,
AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp,
- bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr) {
+ bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr,
+ bool IsIgnoreDenormalMode, bool IsFineGrainedMemory, bool IsRemoteMemory) {
if (!updateToLocation(Loc))
return Loc.IP;
@@ -9198,9 +9214,9 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
// If UpdateExpr is 'x' updated with some `expr` not based on 'x',
// 'x' is simply atomically rewritten with 'expr'.
AtomicRMWInst::BinOp AtomicOp = (UpdateExpr ? RMWOp : AtomicRMWInst::Xchg);
- Expected<std::pair<Value *, Value *>> AtomicResult =
- emitAtomicUpdate(AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
- X.IsVolatile, IsXBinopExpr);
+ Expected<std::pair<Value *, Value *>> AtomicResult = emitAtomicUpdate(
+ AllocaIP, X.Var, X.ElemTy, Expr, AO, AtomicOp, UpdateOp, X.IsVolatile,
+ IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
if (!AtomicResult)
return AtomicResult.takeError();
Value *CapturedVal =
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 9f18199c75b4b..2465de77ce03c 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3269,13 +3269,25 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
};
+ bool isIgnoreDenormalMode = false;
+ bool isFineGrainedMemory = false;
+ bool isRemoteMemory = false;
+ if (opInst->hasAttr(opInst.getAtomicControlAttrName())) {
+ mlir::omp::AtomicControlAttr atomicControlAttr =
+ opInst.getAtomicControlAttr();
+ isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
+ isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
+ isRemoteMemory = atomicControlAttr.getRemoteMemory();
+ }
+
// Handle ambiguous alloca, if any.
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
ompBuilder->createAtomicUpdate(ompLoc, allocaIP, llvmAtomicX, llvmExpr,
atomicOrdering, binop, updateFn,
- isXBinopExpr);
+ isXBinopExpr, isIgnoreDenormalMode,
+ isFineGrainedMemory, isRemoteMemory);
if (failed(handleError(afterIP, *opInst)))
return failure();
@@ -3364,13 +3376,26 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
};
+ bool isIgnoreDenormalMode = false;
+ bool isFineGrainedMemory = false;
+ bool isRemoteMemory = false;
+ if (atomicUpdateOp &&
+ atomicUpdateOp->hasAttr(atomicUpdateOp.getAtomicControlAttrName())) {
+ mlir::omp::AtomicControlAttr atomicControlAttr =
+ atomicUpdateOp.getAtomicControlAttr();
+ isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
+ isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
+ isRemoteMemory = atomicControlAttr.getRemoteMemory();
+ }
+
// Handle ambiguous alloca, if any.
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
llvm::OpenMPIRBuilder::InsertPointOrErrorTy afterIP =
ompBuilder->createAtomicCapture(
ompLoc, allocaIP, llvmAtomicX, llvmAtomicV, llvmExpr, atomicOrdering,
- binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr);
+ binop, updateFn, atomicUpdateOp, isPostfixUpdate, isXBinopExpr,
+ isIgnoreDenormalMode, isFineGrainedMemory, isRemoteMemory);
if (failed(handleError(afterIP, *atomicCaptureOp)))
return failure();
diff --git a/mlir/test/Target/LLVMIR/omptarget-atomic-control-options.mlir b/mlir/test/Target/LLVMIR/omptarget-atomic-control-options.mlir
new file mode 100644
index 0000000000000..3b0005bd20798
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-atomic-control-options.mlir
@@ -0,0 +1,36 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK: atomicrmw add ptr %loadgep_, i32 1 monotonic, align 4, !amdgpu.ignore.denormal.mode !{{.*}}, !amdgpu.no.fine.grained.memory !{{.*}}, !amdgpu.no.remote.memory !{{.*}}
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<!llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<1> = dense<64> : vector<4xi64>, !llvm.ptr<2> = dense<32> : vector<4xi64>, !llvm.ptr<3> = dense<32> : vector<4xi64>, !llvm.ptr<4> = dense<64> : vector<4xi64>, !llvm.ptr<5> = dense<32> : vector<4xi64>, !llvm.ptr<6> = dense<32> : vector<4xi64>, !llvm.ptr<7> = dense<[160, 256, 256, 32]> : vector<4xi64>, !llvm.ptr<8> = dense<[128, 128, 128, 48]> : vector<4xi64>, !llvm.ptr<9> = dense<[192, 256, 256, 32]> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.legal_int_widths" = array<i32: 32, 64>, "dlti.stack_alignment" = 32 : i64, "dlti.alloca_memory_space" = 5 : ui64, "dlti.global_memory_space" = 1 : ui64>, fir.atomic_ignore_denormal_mode, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", fir.target_cpu = "generic-hsa", llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.flags = #omp.flags<openmp_device_version = 31>, omp.is_gpu = true, omp.is_target_device = true, omp.requires = #omp<clause_requires none>, omp.target_triples = [], omp.version = #omp.version<version = 31>} {
+ llvm.func @_QQmain() attributes {fir.bindc_name = "TEST", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, target_cpu = "generic-hsa"} {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "threads"} : (i64) -> !llvm.ptr<5>
+ %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ %3 = llvm.mlir.constant(1 : i64) : i64
+ %4 = llvm.alloca %3 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr<5>
+ %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
+ %6 = llvm.mlir.constant(0 : i32) : i32
+ %7 = llvm.mlir.constant(128 : i32) : i32
+ %8 = llvm.mlir.constant(1 : i64) : i64
+ %9 = llvm.mlir.constant(1 : i64) : i64
+ llvm.store %7, %2 : i32, !llvm.ptr
+ llvm.store %6, %5 : i32, !llvm.ptr
+ %10 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "threads"}
+ %11 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "a"}
+ omp.target map_entries(%10 -> %arg0, %11 -> %arg1 : !llvm.ptr, !llvm.ptr) {
+ %12 = llvm.mlir.constant(1 : i32) : i32
+ %13 = llvm.load %arg0 : !llvm.ptr -> i32
+ omp.parallel num_threads(%13 : i32) {
+ omp.atomic.update %arg1 : !llvm.ptr {
+ ^bb0(%arg2: i32):
+ %14 = llvm.add %arg2, %12 : i32
+ omp.yield(%14 : i32)
+ } {atomic_control = #omp.atomic_control<ignore_denormal_mode = true>}
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+ }
+}
>From 49cc2dcd22008935b954c96cd30968eea6f10304 Mon Sep 17 00:00:00 2001
From: Anchu Rajendran <asudhaku at amd.com>
Date: Thu, 7 Aug 2025 12:56:52 -0500
Subject: [PATCH 2/2] R2: Addressing review comments
---
.../OpenMP/OpenMPToLLVMIRTranslation.cpp | 50 ++++++++++---------
...target-atomic-capture-control-options.mlir | 44 ++++++++++++++++
...target-atomic-update-control-options.mlir} | 0
3 files changed, 71 insertions(+), 23 deletions(-)
create mode 100644 mlir/test/Target/LLVMIR/omptarget-atomic-capture-control-options.mlir
rename mlir/test/Target/LLVMIR/{omptarget-atomic-control-options.mlir => omptarget-atomic-update-control-options.mlir} (100%)
diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
index 2465de77ce03c..4b74d5c4e1152 100644
--- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp
@@ -3205,6 +3205,23 @@ llvm::AtomicRMWInst::BinOp convertBinOpToAtomic(Operation &op) {
.Default(llvm::AtomicRMWInst::BinOp::BAD_BINOP);
}
+void extractAtomicControlFlags(omp::AtomicUpdateOp atomicUpdateOp,
+ bool &isIgnoreDenormalMode,
+ bool &isFineGrainedMemory,
+ bool &isRemoteMemory) {
+ isIgnoreDenormalMode = false;
+ isFineGrainedMemory = false;
+ isRemoteMemory = false;
+ if (atomicUpdateOp &&
+ atomicUpdateOp->hasAttr(atomicUpdateOp.getAtomicControlAttrName())) {
+ mlir::omp::AtomicControlAttr atomicControlAttr =
+ atomicUpdateOp.getAtomicControlAttr();
+ isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
+ isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
+ isRemoteMemory = atomicControlAttr.getRemoteMemory();
+ }
+}
+
/// Converts an OpenMP atomic update operation using OpenMPIRBuilder.
static LogicalResult
convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
@@ -3269,17 +3286,11 @@ convertOmpAtomicUpdate(omp::AtomicUpdateOp &opInst,
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
};
- bool isIgnoreDenormalMode = false;
- bool isFineGrainedMemory = false;
- bool isRemoteMemory = false;
- if (opInst->hasAttr(opInst.getAtomicControlAttrName())) {
- mlir::omp::AtomicControlAttr atomicControlAttr =
- opInst.getAtomicControlAttr();
- isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
- isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
- isRemoteMemory = atomicControlAttr.getRemoteMemory();
- }
-
+ bool isIgnoreDenormalMode;
+ bool isFineGrainedMemory;
+ bool isRemoteMemory;
+ extractAtomicControlFlags(opInst, isIgnoreDenormalMode, isFineGrainedMemory,
+ isRemoteMemory);
// Handle ambiguous alloca, if any.
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
@@ -3376,18 +3387,11 @@ convertOmpAtomicCapture(omp::AtomicCaptureOp atomicCaptureOp,
return moduleTranslation.lookupValue(yieldop.getResults()[0]);
};
- bool isIgnoreDenormalMode = false;
- bool isFineGrainedMemory = false;
- bool isRemoteMemory = false;
- if (atomicUpdateOp &&
- atomicUpdateOp->hasAttr(atomicUpdateOp.getAtomicControlAttrName())) {
- mlir::omp::AtomicControlAttr atomicControlAttr =
- atomicUpdateOp.getAtomicControlAttr();
- isIgnoreDenormalMode = atomicControlAttr.getIgnoreDenormalMode();
- isFineGrainedMemory = atomicControlAttr.getFineGrainedMemory();
- isRemoteMemory = atomicControlAttr.getRemoteMemory();
- }
-
+ bool isIgnoreDenormalMode;
+ bool isFineGrainedMemory;
+ bool isRemoteMemory;
+ extractAtomicControlFlags(atomicUpdateOp, isIgnoreDenormalMode,
+ isFineGrainedMemory, isRemoteMemory);
// Handle ambiguous alloca, if any.
auto allocaIP = findAllocaInsertPoint(builder, moduleTranslation);
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
diff --git a/mlir/test/Target/LLVMIR/omptarget-atomic-capture-control-options.mlir b/mlir/test/Target/LLVMIR/omptarget-atomic-capture-control-options.mlir
new file mode 100644
index 0000000000000..355390719322f
--- /dev/null
+++ b/mlir/test/Target/LLVMIR/omptarget-atomic-capture-control-options.mlir
@@ -0,0 +1,44 @@
+// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s
+
+// CHECK: atomicrmw add ptr %loadgep_, i32 1 monotonic, align 4, !amdgpu.no.remote.memory !{{.*}}
+
+module attributes {dlti.dl_spec = #dlti.dl_spec<!llvm.ptr = dense<64> : vector<4xi64>, !llvm.ptr<1> = dense<64> : vector<4xi64>, !llvm.ptr<2> = dense<32> : vector<4xi64>, !llvm.ptr<3> = dense<32> : vector<4xi64>, !llvm.ptr<4> = dense<64> : vector<4xi64>, !llvm.ptr<5> = dense<32> : vector<4xi64>, !llvm.ptr<6> = dense<32> : vector<4xi64>, !llvm.ptr<7> = dense<[160, 256, 256, 32]> : vector<4xi64>, !llvm.ptr<8> = dense<[128, 128, 128, 48]> : vector<4xi64>, !llvm.ptr<9> = dense<[192, 256, 256, 32]> : vector<4xi64>, i64 = dense<64> : vector<2xi64>, i1 = dense<8> : vector<2xi64>, i8 = dense<8> : vector<2xi64>, i16 = dense<16> : vector<2xi64>, i32 = dense<32> : vector<2xi64>, f16 = dense<16> : vector<2xi64>, f64 = dense<64> : vector<2xi64>, f128 = dense<128> : vector<2xi64>, "dlti.endianness" = "little", "dlti.legal_int_widths" = array<i32: 32, 64>, "dlti.stack_alignment" = 32 : i64, "dlti.alloca_memory_space" = 5 : ui64, "dlti.global_memory_space" = 1 : ui64>, fir.atomic_fine_grained_memory, fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", fir.target_cpu = "generic-hsa", llvm.data_layout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128:128:48-p9:192:256:256:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9", llvm.target_triple = "amdgcn-amd-amdhsa", omp.flags = #omp.flags<openmp_device_version = 31>, omp.is_gpu = true, omp.is_target_device = true, omp.requires = #omp<clause_requires none>, omp.target_triples = [], omp.version = #omp.version<version = 31>} {
+ llvm.func @_QQmain() attributes {fir.bindc_name = "TEST", omp.declare_target = #omp.declaretarget<device_type = (host), capture_clause = (to)>, target_cpu = "generic-hsa"} {
+ %0 = llvm.mlir.constant(1 : i64) : i64
+ %1 = llvm.alloca %0 x i32 {bindc_name = "threads"} : (i64) -> !llvm.ptr<5>
+ %2 = llvm.addrspacecast %1 : !llvm.ptr<5> to !llvm.ptr
+ %3 = llvm.mlir.constant(1 : i64) : i64
+ %4 = llvm.alloca %3 x i32 {bindc_name = "capture"} : (i64) -> !llvm.ptr<5>
+ %5 = llvm.addrspacecast %4 : !llvm.ptr<5> to !llvm.ptr
+ %6 = llvm.mlir.constant(1 : i64) : i64
+ %7 = llvm.alloca %6 x i32 {bindc_name = "a"} : (i64) -> !llvm.ptr<5>
+ %8 = llvm.addrspacecast %7 : !llvm.ptr<5> to !llvm.ptr
+ %9 = llvm.mlir.constant(0 : i32) : i32
+ %10 = llvm.mlir.constant(128 : i32) : i32
+ %11 = llvm.mlir.constant(1 : i64) : i64
+ %12 = llvm.mlir.constant(1 : i64) : i64
+ %13 = llvm.mlir.constant(1 : i64) : i64
+ llvm.store %10, %2 : i32, !llvm.ptr
+ llvm.store %9, %8 : i32, !llvm.ptr
+ %14 = omp.map.info var_ptr(%2 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "threads"}
+ %15 = omp.map.info var_ptr(%5 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "capture"}
+ %16 = omp.map.info var_ptr(%8 : !llvm.ptr, i32) map_clauses(implicit, exit_release_or_enter_alloc) capture(ByCopy) -> !llvm.ptr {name = "a"}
+ omp.target map_entries(%14 -> %arg0, %15 -> %arg1, %16 -> %arg2 : !llvm.ptr, !llvm.ptr, !llvm.ptr) {
+ %17 = llvm.mlir.constant(1 : i32) : i32
+ %18 = llvm.load %arg0 : !llvm.ptr -> i32
+ omp.parallel num_threads(%18 : i32) {
+ omp.atomic.capture {
+ omp.atomic.read %arg1 = %arg2 : !llvm.ptr, !llvm.ptr, i32
+ omp.atomic.update %arg2 : !llvm.ptr {
+ ^bb0(%arg3: i32):
+ %19 = llvm.add %arg3, %17 : i32
+ omp.yield(%19 : i32)
+ } {atomic_control = #omp.atomic_control<fine_grained_memory = true>}
+ }
+ omp.terminator
+ }
+ omp.terminator
+ }
+ llvm.return
+ }
+}
diff --git a/mlir/test/Target/LLVMIR/omptarget-atomic-control-options.mlir b/mlir/test/Target/LLVMIR/omptarget-atomic-update-control-options.mlir
similarity index 100%
rename from mlir/test/Target/LLVMIR/omptarget-atomic-control-options.mlir
rename to mlir/test/Target/LLVMIR/omptarget-atomic-update-control-options.mlir
More information about the llvm-commits
mailing list