[clang] clang: Allow targets to set custom metadata on atomics (PR #96906)
via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 27 06:22:05 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Matt Arsenault (arsenm)
<details>
<summary>Changes</summary>
Use this to replace the emission of the amdgpu-unsafe-fp-atomics
attribute in favor of per-instruction metadata. In the future
new fine grained controls should be introduced that also cover
the integer cases.
Add a wrapper around CreateAtomicRMW that appends the metadata,
and update a few use contexts to use it.
---
Patch is 38.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/96906.diff
9 Files Affected:
- (modified) clang/lib/CodeGen/CGAtomic.cpp (+12-1)
- (modified) clang/lib/CodeGen/CGExprScalar.cpp (+7-6)
- (modified) clang/lib/CodeGen/CGStmtOpenMP.cpp (+2-2)
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+7)
- (modified) clang/lib/CodeGen/TargetInfo.h (+4)
- (modified) clang/lib/CodeGen/Targets/AMDGPU.cpp (+19)
- (added) clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c (+316)
- (modified) clang/test/CodeGenCUDA/amdgpu-atomic-ops.cu (+79-23)
- (added) clang/test/OpenMP/amdgpu-unsafe-fp-atomics.cpp (+59)
``````````diff
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index fbf942d06ca6e..fbe9569e50ef6 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -727,7 +727,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::AtomicRMWInst *RMWI =
- CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope);
+ CGF.emitAtomicRMWInst(Op, Ptr, LoadVal1, Order, Scope);
RMWI->setVolatile(E->isVolatile());
// For __atomic_*_fetch operations, perform the operation again to
@@ -2034,6 +2034,17 @@ std::pair<RValue, llvm::Value *> CodeGenFunction::EmitAtomicCompareExchange(
IsWeak);
}
+llvm::AtomicRMWInst *
+CodeGenFunction::emitAtomicRMWInst(llvm::AtomicRMWInst::BinOp Op, Address Addr,
+ llvm::Value *Val, llvm::AtomicOrdering Order,
+ llvm::SyncScope::ID SSID) {
+
+ llvm::AtomicRMWInst *RMW =
+ Builder.CreateAtomicRMW(Op, Addr, Val, Order, SSID);
+ getTargetHooks().setTargetAtomicMetadata(*this, *RMW);
+ return RMW;
+}
+
void CodeGenFunction::EmitAtomicUpdate(
LValue LVal, llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp, bool IsVolatile) {
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index f40f3c273206b..8eb7a64c144c8 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2835,9 +2835,10 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
isInc ? llvm::Instruction::FAdd : llvm::Instruction::FSub;
llvm::Value *amt = llvm::ConstantFP::get(
VMContext, llvm::APFloat(static_cast<float>(1.0)));
- llvm::Value *old =
- Builder.CreateAtomicRMW(aop, LV.getAddress(), amt,
- llvm::AtomicOrdering::SequentiallyConsistent);
+ llvm::AtomicRMWInst *old =
+ CGF.emitAtomicRMWInst(aop, LV.getAddress(), amt,
+ llvm::AtomicOrdering::SequentiallyConsistent);
+
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
}
value = EmitLoadOfLValue(LV, E->getExprLoc());
@@ -3577,9 +3578,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
EmitScalarConversion(OpInfo.RHS, E->getRHS()->getType(), LHSTy,
E->getExprLoc()),
LHSTy);
- Value *OldVal = Builder.CreateAtomicRMW(
- AtomicOp, LHSLV.getAddress(), Amt,
- llvm::AtomicOrdering::SequentiallyConsistent);
+
+ llvm::AtomicRMWInst *OldVal =
+ CGF.emitAtomicRMWInst(AtomicOp, LHSLV.getAddress(), Amt);
// Since operation is atomic, the result type is guaranteed to be the
// same as the input in LLVM terms.
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index f73d32de7c484..8c152fef73557 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -6235,8 +6235,8 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
UpdateVal = CGF.Builder.CreateCast(llvm::Instruction::CastOps::UIToFP, IC,
X.getAddress().getElementType());
}
- llvm::Value *Res =
- CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(), UpdateVal, AO);
+ llvm::AtomicRMWInst *Res =
+ CGF.emitAtomicRMWInst(RMWOp, X.getAddress(), UpdateVal, AO);
return std::make_pair(true, RValue::get(Res));
}
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 13f12b5d878a6..6cfcb76eea42a 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -4153,6 +4153,13 @@ class CodeGenFunction : public CodeGenTypeCache {
llvm::AtomicOrdering::SequentiallyConsistent,
bool IsWeak = false, AggValueSlot Slot = AggValueSlot::ignored());
+ /// Emit an atomicrmw instruction, and applying relevant metadata when
+ /// applicable.
+ llvm::AtomicRMWInst *emitAtomicRMWInst(
+ llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val,
+ llvm::AtomicOrdering Order = llvm::AtomicOrdering::SequentiallyConsistent,
+ llvm::SyncScope::ID SSID = llvm::SyncScope::System);
+
void EmitAtomicUpdate(LValue LVal, llvm::AtomicOrdering AO,
const llvm::function_ref<RValue(RValue)> &UpdateOp,
bool IsVolatile);
diff --git a/clang/lib/CodeGen/TargetInfo.h b/clang/lib/CodeGen/TargetInfo.h
index f242d9e36ed40..1bd821e7206b9 100644
--- a/clang/lib/CodeGen/TargetInfo.h
+++ b/clang/lib/CodeGen/TargetInfo.h
@@ -333,6 +333,10 @@ class TargetCodeGenInfo {
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const;
+ /// Allow the target to apply other metadata to an atomic instruction
+ virtual void setTargetAtomicMetadata(CodeGenFunction &CGF,
+ llvm::AtomicRMWInst &RMW) const {}
+
/// Interface class for filling custom fields of a block literal for OpenCL.
class TargetOpenCLBlockHelper {
public:
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index 4d3275e17c386..37e6af3d4196a 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -311,6 +311,8 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
SyncScope Scope,
llvm::AtomicOrdering Ordering,
llvm::LLVMContext &Ctx) const override;
+ void setTargetAtomicMetadata(CodeGenFunction &CGF,
+ llvm::AtomicRMWInst &RMW) const override;
llvm::Value *createEnqueuedBlockKernel(CodeGenFunction &CGF,
llvm::Function *BlockInvokeFunc,
llvm::Type *BlockTy) const override;
@@ -546,6 +548,23 @@ AMDGPUTargetCodeGenInfo::getLLVMSyncScopeID(const LangOptions &LangOpts,
return Ctx.getOrInsertSyncScopeID(Name);
}
+void AMDGPUTargetCodeGenInfo::setTargetAtomicMetadata(
+ CodeGenFunction &CGF, llvm::AtomicRMWInst &RMW) const {
+ if (!CGF.getTarget().allowAMDGPUUnsafeFPAtomics())
+ return;
+
+ // TODO: Introduce new, more controlled options that also work for integers,
+ // and deprecate allowAMDGPUUnsafeFPAtomics.
+ llvm::AtomicRMWInst::BinOp RMWOp = RMW.getOperation();
+ if (llvm::AtomicRMWInst::isFPOperation(RMWOp)) {
+ llvm::MDNode *Empty = llvm::MDNode::get(CGF.getLLVMContext(), {});
+ RMW.setMetadata("amdgpu.no.fine.grained.memory", Empty);
+
+ if (RMWOp == llvm::AtomicRMWInst::FAdd && RMW.getType()->isFloatTy())
+ RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
+ }
+}
+
bool AMDGPUTargetCodeGenInfo::shouldEmitStaticExternCAliases() const {
return false;
}
diff --git a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
new file mode 100644
index 0000000000000..6deff1116e1d8
--- /dev/null
+++ b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
@@ -0,0 +1,316 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
+// RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK,SAFE %s
+// RUN: %clang_cc1 -fnative-half-arguments-and-returns -triple amdgcn-amd-amdhsa-gnu -target-cpu gfx900 -emit-llvm -munsafe-fp-atomics -o - %s | FileCheck -check-prefixes=CHECK,UNSAFE %s
+
+// SAFE-LABEL: define dso_local float @test_float_post_inc(
+// SAFE-SAME: ) #[[ATTR0:[0-9]+]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: ret float [[TMP0]]
+//
+// UNSAFE-LABEL: define dso_local float @test_float_post_inc(
+// UNSAFE-SAME: ) #[[ATTR0:[0-9]+]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: ret float [[TMP0]]
+//
+float test_float_post_inc()
+{
+ static _Atomic float n;
+ return n++;
+}
+
+// SAFE-LABEL: define dso_local float @test_float_post_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: ret float [[TMP0]]
+//
+// UNSAFE-LABEL: define dso_local float @test_float_post_dc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]]
+// UNSAFE-NEXT: ret float [[TMP0]]
+//
+float test_float_post_dc()
+{
+ static _Atomic float n;
+ return n--;
+}
+
+// SAFE-LABEL: define dso_local float @test_float_pre_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// SAFE-NEXT: ret float [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local float @test_float_pre_dc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]]
+// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// UNSAFE-NEXT: ret float [[TMP1]]
+//
+float test_float_pre_dc()
+{
+ static _Atomic float n;
+ return --n;
+}
+
+// SAFE-LABEL: define dso_local float @test_float_pre_inc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4
+// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// SAFE-NEXT: ret float [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local float @test_float_pre_inc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// UNSAFE-NEXT: ret float [[TMP1]]
+//
+float test_float_pre_inc()
+{
+ static _Atomic float n;
+ return ++n;
+}
+
+// SAFE-LABEL: define dso_local double @test_double_post_inc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8
+// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: ret double [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local double @test_double_post_inc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: ret double [[TMP1]]
+//
+double test_double_post_inc()
+{
+ static _Atomic double n;
+ return n++;
+}
+
+// SAFE-LABEL: define dso_local double @test_double_post_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8
+// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: ret double [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local double @test_double_post_dc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
+// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: [[TMP1:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: ret double [[TMP1]]
+//
+double test_double_post_dc()
+{
+ static _Atomic double n;
+ return n--;
+}
+
+// SAFE-LABEL: define dso_local double @test_double_pre_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8
+// SAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: ret double [[TMP2]]
+//
+// UNSAFE-LABEL: define dso_local double @test_double_pre_dc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test_double_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]]
+// UNSAFE-NEXT: [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
+// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: ret double [[TMP2]]
+//
+double test_double_pre_dc()
+{
+ static _Atomic double n;
+ return --n;
+}
+
+// SAFE-LABEL: define dso_local double @test_double_pre_inc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8
+// SAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// SAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// SAFE-NEXT: ret double [[TMP2]]
+//
+// UNSAFE-LABEL: define dso_local double @test_double_pre_inc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test_double_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
+// UNSAFE-NEXT: store float [[TMP1]], ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: [[TMP2:%.*]] = load double, ptr [[RETVAL_ASCAST]], align 8
+// UNSAFE-NEXT: ret double [[TMP2]]
+//
+double test_double_pre_inc()
+{
+ static _Atomic double n;
+ return ++n;
+}
+
+// SAFE-LABEL: define dso_local half @test__Float16_post_inc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2
+// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2
+// SAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
+// SAFE-NEXT: ret half [[TMP1]]
+//
+// UNSAFE-LABEL: define dso_local half @test__Float16_post_inc(
+// UNSAFE-SAME: ) #[[ATTR0]] {
+// UNSAFE-NEXT: [[ENTRY:.*:]]
+// UNSAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// UNSAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// UNSAFE-NEXT: [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr addrspace(1) @test__Float16_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 2, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.ignore.denormal.mode [[META3]]
+// UNSAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2
+// UNSAFE-NEXT: [[TMP1:%.*]] = load half, ptr [[RETVAL_ASCAST]], align 2
+// UNSAFE-NEXT: ret half [[TMP1]]
+//
+_Float16 test__Float16_post_inc()
+{
+ static _Atomic _Float16 n;
+ return n++;
+}
+
+// SAFE-LABEL: define dso_local half @test__Float16_post_dc(
+// SAFE-SAME: ) #[[ATTR0]] {
+// SAFE-NEXT: [[ENTRY:.*:]]
+// SAFE-NEXT: [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
+// SAFE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// SAFE-NEXT: [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr addrspace(1) @test__Float16_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 2
+// SAFE-NEXT: store float [[TMP0]], ptr [[RETVAL_ASCAST]], align 2...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/96906
More information about the cfe-commits
mailing list