[llvm] [AMDGPU] Handle amdgpu.last.use metadata (PR #83816)
Mirko BrkuĊĦanin via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 4 06:14:44 PST 2024
https://github.com/mbrkusanin updated https://github.com/llvm/llvm-project/pull/83816
>From 6a158a8a0d297b544b01beb393e101b3ceb5fe78 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Mon, 4 Mar 2024 11:03:10 +0100
Subject: [PATCH 1/2] [AMDGPU] Handle amdgpu.last.use metadata
Convert !amdgpu.last.use metadata into Machine Memory Operand for last use
and handle it in SIMemoryLegalizer similary to nontemporal and volatile.
---
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 7 +-
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 56 ++++++++++++++-
.../AMDGPU/memory-legalizer-flat-lastuse.ll | 66 +++++++++++++++++
.../AMDGPU/memory-legalizer-global-lastuse.ll | 65 +++++++++++++++++
.../memory-legalizer-private-lastuse.ll | 70 +++++++++++++++++++
5 files changed, 259 insertions(+), 5 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 9476c33acc34ac..047ca258168035 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16197,9 +16197,12 @@ bool SITargetLowering::isReassocProfitable(MachineRegisterInfo &MRI,
MachineMemOperand::Flags
SITargetLowering::getTargetMMOFlags(const Instruction &I) const {
// Propagate metadata set by AMDGPUAnnotateUniformValues to the MMO of a load.
+ MachineMemOperand::Flags Flags = MachineMemOperand::MONone;
if (I.getMetadata("amdgpu.noclobber"))
- return MONoClobber;
- return MachineMemOperand::MONone;
+ Flags |= MONoClobber;
+ if (I.getMetadata("amdgpu.last.use"))
+ Flags |= MOLastUse;
+ return Flags;
}
bool SITargetLowering::checkForPhysRegDependency(
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 73c23f0f987c3d..ff3e6bc9f8b08f 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -98,6 +98,7 @@ class SIMemOpInfo final {
bool IsCrossAddressSpaceOrdering = false;
bool IsVolatile = false;
bool IsNonTemporal = false;
+ bool IsLastUse = false;
SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
SIAtomicScope Scope = SIAtomicScope::SYSTEM,
@@ -107,13 +108,15 @@ class SIMemOpInfo final {
AtomicOrdering FailureOrdering =
AtomicOrdering::SequentiallyConsistent,
bool IsVolatile = false,
- bool IsNonTemporal = false)
+ bool IsNonTemporal = false,
+ bool IsLastUse = false)
: Ordering(Ordering), FailureOrdering(FailureOrdering),
Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
InstrAddrSpace(InstrAddrSpace),
IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
IsVolatile(IsVolatile),
- IsNonTemporal(IsNonTemporal) {
+ IsNonTemporal(IsNonTemporal),
+ IsLastUse(IsLastUse) {
if (Ordering == AtomicOrdering::NotAtomic) {
assert(Scope == SIAtomicScope::NONE &&
@@ -201,6 +204,12 @@ class SIMemOpInfo final {
return IsNonTemporal;
}
+ /// \returns True if memory access of the machine instruction used to
+ /// create this SIMemOpInfo is last use, false otherwise.
+ bool isLastUse() const {
+ return IsLastUse;
+ }
+
/// \returns True if ordering constraint of the machine instruction used to
/// create this SIMemOpInfo is unordered or higher, false otherwise.
bool isAtomic() const {
@@ -316,6 +325,12 @@ class SICacheControl {
return false;
};
+ /// Update \p MI memory instruction to indicate it is a last use. Return true
+ /// iff the instruction was modified.
+ virtual bool enableLastUse(MachineInstr &MI, bool IsLastUse) const {
+ return false;
+ }
+
/// Inserts any necessary instructions at position \p Pos relative
/// to instruction \p MI to ensure memory instructions before \p Pos of kind
/// \p Op associated with address spaces \p AddrSpace have completed. Used
@@ -592,6 +607,10 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
// MI. \returns Returns true if \p MI is modified, false otherwise.
bool setScope(const MachineBasicBlock::iterator MI,
AMDGPU::CPol::CPol Value) const;
+ // Checks if CPol operand is present in instruction \p MI and if current Scope
+ // policy is same as \p Value.
+ bool isScope(const MachineBasicBlock::iterator MI,
+ AMDGPU::CPol::CPol Value) const;
// Stores with system scope (SCOPE_SYS) need to wait for:
// - loads or atomics(returning) - wait for {LOAD|SAMPLE|BVH|KM}CNT==0
@@ -618,6 +637,9 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
bool IsNonTemporal) const override;
bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const override;
+
+ bool enableLastUse(MachineInstr &MI,
+ bool IsLastUse) const override;
};
class SIMemoryLegalizer final : public MachineFunctionPass {
@@ -745,12 +767,14 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::NONE;
bool IsNonTemporal = true;
bool IsVolatile = false;
+ bool IsLastUse = false;
// Validator should check whether or not MMOs cover the entire set of
// locations accessed by the memory instruction.
for (const auto &MMO : MI->memoperands()) {
IsNonTemporal &= MMO->isNonTemporal();
IsVolatile |= MMO->isVolatile();
+ IsLastUse |= MMO->getFlags() & MOLastUse;
InstrAddrSpace |=
toSIAtomicAddrSpace(MMO->getPointerInfo().getAddrSpace());
AtomicOrdering OpOrdering = MMO->getSuccessOrdering();
@@ -792,7 +816,7 @@ std::optional<SIMemOpInfo> SIMemOpAccess::constructFromMIWithMMO(
}
return SIMemOpInfo(Ordering, Scope, OrderingAddrSpace, InstrAddrSpace,
IsCrossAddressSpaceOrdering, FailureOrdering, IsVolatile,
- IsNonTemporal);
+ IsNonTemporal, IsLastUse);
}
std::optional<SIMemOpInfo>
@@ -2209,6 +2233,15 @@ bool SIGfx12CacheControl::setScope(const MachineBasicBlock::iterator MI,
return false;
}
+bool SIGfx12CacheControl::isScope(const MachineBasicBlock::iterator MI,
+ AMDGPU::CPol::CPol Value) const {
+ MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
+ if (!CPol)
+ return false;
+
+ return (CPol->getImm() & AMDGPU::CPol::SCOPE) == Value;
+}
+
bool SIGfx12CacheControl::insertWaitsBeforeSystemScopeStore(
const MachineBasicBlock::iterator MI) const {
// TODO: implement flag for frontend to give us a hint not to insert waits.
@@ -2415,6 +2448,16 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal(
return Changed;
}
+bool SIGfx12CacheControl::enableLastUse(MachineInstr &MI,
+ bool IsLastUse) const {
+ assert(MI.mayLoad() && !MI.mayStore());
+
+ if (IsLastUse && !isScope(MI, AMDGPU::CPol::SCOPE_SYS))
+ return setTH(MI, AMDGPU::CPol::TH_LU);;
+
+ return false;
+}
+
bool SIGfx12CacheControl::expandSystemScopeStore(
MachineBasicBlock::iterator &MI) const {
MachineOperand *CPol = TII->getNamedOperand(*MI, OpName::cpol);
@@ -2471,12 +2514,19 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI,
return Changed;
}
+ // enableVolatileAndOrNonTemporal can insert instructions and advance iterator
+ // MI and we need original instruction for enabling last use.
+ MachineInstr &Inst = *MI;
+
// Atomic instructions already bypass caches to the scope specified by the
// SyncScope operand. Only non-atomic volatile and nontemporal instructions
// need additional treatment.
Changed |= CC->enableVolatileAndOrNonTemporal(MI, MOI.getInstrAddrSpace(),
SIMemOp::LOAD, MOI.isVolatile(),
MOI.isNonTemporal());
+
+ Changed |= CC->enableLastUse(Inst, MOI.isLastUse());
+
return Changed;
}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
new file mode 100644
index 00000000000000..e3ce16fe741f7b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-lastuse.ll
@@ -0,0 +1,66 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s
+
+define amdgpu_kernel void @flat_last_use_load_0(ptr %in, ptr %out) {
+; GFX12-LABEL: flat_last_use_load_0:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
+; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: flat_store_b32 v[0:1], v2
+; GFX12-NEXT: s_endpgm
+entry:
+ %val = load i32, ptr %in, align 4, !amdgpu.last.use !{}
+ store i32 %val, ptr %out
+ ret void
+}
+
+define amdgpu_kernel void @flat_last_use_load_1(ptr %in, ptr %out) {
+; GFX12-LABEL: flat_last_use_load_1:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX12-NEXT: v_lshlrev_b32_e32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX12-NEXT: v_add_co_u32 v0, s0, s0, v0
+; GFX12-NEXT: v_add_co_ci_u32_e64 v1, null, s1, 0, s0
+; GFX12-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_LU
+; GFX12-NEXT: v_dual_mov_b32 v1, s3 :: v_dual_mov_b32 v0, s2
+; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX12-NEXT: flat_store_b32 v[0:1], v2
+; GFX12-NEXT: s_endpgm
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %val.gep = getelementptr inbounds i32, ptr %in, i32 %tid
+ %val = load i32, ptr %val.gep, align 4, !amdgpu.last.use !{}
+ store i32 %val, ptr %out
+ ret void
+}
+
+define amdgpu_kernel void @flat_last_use_volatile_load(ptr %in, ptr %out) {
+; GFX12-LABEL: flat_last_use_volatile_load:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
+; GFX12-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3
+; GFX12-NEXT: s_wait_dscnt 0x0
+; GFX12-NEXT: flat_store_b32 v[0:1], v2
+; GFX12-NEXT: s_endpgm
+entry:
+ %val = load volatile i32, ptr %in, align 4, !amdgpu.last.use !{}
+ store i32 %val, ptr %out
+ ret void
+}
+
+!0 = !{i32 1}
+declare i32 @llvm.amdgcn.workitem.id.x()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX12-CU: {{.*}}
+; GFX12-WGP: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
new file mode 100644
index 00000000000000..56b1a366708127
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-lastuse.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s
+
+define amdgpu_kernel void @global_last_use_load_0(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+; GFX12-LABEL: global_last_use_load_0:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: s_load_b32 s0, s[0:1], 0x0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s0
+; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+entry:
+ %val = load i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @global_last_use_load_1(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+; GFX12-LABEL: global_last_use_load_1:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v0, v0, s[0:1] th:TH_LOAD_LU
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_store_b32 v1, v0, s[2:3]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %val.gep = getelementptr inbounds i32, ptr addrspace(1) %in, i32 %tid
+ %val = load i32, ptr addrspace(1) %val.gep, align 4, !amdgpu.last.use !{}
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @global_last_use_volatile_load(ptr addrspace(1) %in, ptr addrspace(1) %out) {
+; GFX12-LABEL: global_last_use_volatile_load:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_load_b128 s[0:3], s[0:1], 0x0
+; GFX12-NEXT: v_mov_b32_e32 v0, 0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: global_load_b32 v1, v0, s[0:1] scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_store_b32 v0, v1, s[2:3]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+entry:
+ %val = load volatile i32, ptr addrspace(1) %in, align 4, !amdgpu.last.use !{}
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+!0 = !{i32 1}
+declare i32 @llvm.amdgcn.workitem.id.x()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX12-CU: {{.*}}
+; GFX12-WGP: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
new file mode 100644
index 00000000000000..f54e7819e1b56b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-private-lastuse.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX12,GFX12-WGP %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 -mattr=+cumode < %s | FileCheck --check-prefixes=GFX12,GFX12-CU %s
+
+define amdgpu_kernel void @private_nontemporal_load_0(ptr addrspace(5) %in, ptr addrspace(1) %out) {
+; GFX12-LABEL: private_nontemporal_load_0:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b32 s2, s[0:1], 0x0
+; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
+; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: scratch_load_b32 v0, off, s2 th:TH_LOAD_LU
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+entry:
+ %val = load i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{}
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @private_nontemporal_load_1(ptr addrspace(5) %in, ptr addrspace(1) %out) {
+; GFX12-LABEL: private_nontemporal_load_1:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_load_b32 s2, s[0:1], 0x0
+; GFX12-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_lshlrev_b32 v0, 2, v0
+; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: scratch_load_b32 v0, v0, s2 th:TH_LOAD_LU
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+entry:
+ %tid = call i32 @llvm.amdgcn.workitem.id.x()
+ %val.gep = getelementptr inbounds i32, ptr addrspace(5) %in, i32 %tid
+ %val = load i32, ptr addrspace(5) %val.gep, align 4, !amdgpu.last.use !{}
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @private_nontemporal_volatile_load(ptr addrspace(5) %in, ptr addrspace(1) %out) {
+; GFX12-LABEL: private_nontemporal_volatile_load:
+; GFX12: ; %bb.0: ; %entry
+; GFX12-NEXT: s_clause 0x1
+; GFX12-NEXT: s_load_b32 s2, s[0:1], 0x0
+; GFX12-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
+; GFX12-NEXT: v_mov_b32_e32 v1, 0
+; GFX12-NEXT: s_wait_kmcnt 0x0
+; GFX12-NEXT: scratch_load_b32 v0, off, s2 scope:SCOPE_SYS
+; GFX12-NEXT: s_wait_loadcnt 0x0
+; GFX12-NEXT: global_store_b32 v1, v0, s[0:1]
+; GFX12-NEXT: s_nop 0
+; GFX12-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX12-NEXT: s_endpgm
+entry:
+ %val = load volatile i32, ptr addrspace(5) %in, align 4, !amdgpu.last.use !{}
+ store i32 %val, ptr addrspace(1) %out
+ ret void
+}
+
+!0 = !{i32 1}
+declare i32 @llvm.amdgcn.workitem.id.x()
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX12-CU: {{.*}}
+; GFX12-WGP: {{.*}}
>From f0f3ffa2487fca54e9b9395656fbff1cf39b55d9 Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin at amd.com>
Date: Mon, 4 Mar 2024 11:26:09 +0100
Subject: [PATCH 2/2] clang-format
---
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 40 +++++++++-----------
1 file changed, 17 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index ff3e6bc9f8b08f..534a9380b8b77b 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -100,23 +100,20 @@ class SIMemOpInfo final {
bool IsNonTemporal = false;
bool IsLastUse = false;
- SIMemOpInfo(AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
- SIAtomicScope Scope = SIAtomicScope::SYSTEM,
- SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
- SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
- bool IsCrossAddressSpaceOrdering = true,
- AtomicOrdering FailureOrdering =
- AtomicOrdering::SequentiallyConsistent,
- bool IsVolatile = false,
- bool IsNonTemporal = false,
- bool IsLastUse = false)
- : Ordering(Ordering), FailureOrdering(FailureOrdering),
- Scope(Scope), OrderingAddrSpace(OrderingAddrSpace),
- InstrAddrSpace(InstrAddrSpace),
- IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
- IsVolatile(IsVolatile),
- IsNonTemporal(IsNonTemporal),
- IsLastUse(IsLastUse) {
+ SIMemOpInfo(
+ AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent,
+ SIAtomicScope Scope = SIAtomicScope::SYSTEM,
+ SIAtomicAddrSpace OrderingAddrSpace = SIAtomicAddrSpace::ATOMIC,
+ SIAtomicAddrSpace InstrAddrSpace = SIAtomicAddrSpace::ALL,
+ bool IsCrossAddressSpaceOrdering = true,
+ AtomicOrdering FailureOrdering = AtomicOrdering::SequentiallyConsistent,
+ bool IsVolatile = false, bool IsNonTemporal = false,
+ bool IsLastUse = false)
+ : Ordering(Ordering), FailureOrdering(FailureOrdering), Scope(Scope),
+ OrderingAddrSpace(OrderingAddrSpace), InstrAddrSpace(InstrAddrSpace),
+ IsCrossAddressSpaceOrdering(IsCrossAddressSpaceOrdering),
+ IsVolatile(IsVolatile), IsNonTemporal(IsNonTemporal),
+ IsLastUse(IsLastUse) {
if (Ordering == AtomicOrdering::NotAtomic) {
assert(Scope == SIAtomicScope::NONE &&
@@ -206,9 +203,7 @@ class SIMemOpInfo final {
/// \returns True if memory access of the machine instruction used to
/// create this SIMemOpInfo is last use, false otherwise.
- bool isLastUse() const {
- return IsLastUse;
- }
+ bool isLastUse() const { return IsLastUse; }
/// \returns True if ordering constraint of the machine instruction used to
/// create this SIMemOpInfo is unordered or higher, false otherwise.
@@ -638,8 +633,7 @@ class SIGfx12CacheControl : public SIGfx11CacheControl {
bool expandSystemScopeStore(MachineBasicBlock::iterator &MI) const override;
- bool enableLastUse(MachineInstr &MI,
- bool IsLastUse) const override;
+ bool enableLastUse(MachineInstr &MI, bool IsLastUse) const override;
};
class SIMemoryLegalizer final : public MachineFunctionPass {
@@ -2453,7 +2447,7 @@ bool SIGfx12CacheControl::enableLastUse(MachineInstr &MI,
assert(MI.mayLoad() && !MI.mayStore());
if (IsLastUse && !isScope(MI, AMDGPU::CPol::SCOPE_SYS))
- return setTH(MI, AMDGPU::CPol::TH_LU);;
+ return setTH(MI, AMDGPU::CPol::TH_LU);
return false;
}
More information about the llvm-commits
mailing list