[PATCH] D101380: [AMDGPU] Move insertion of function entry waitcnt later
Austin Kerbow via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed May 5 17:59:32 PDT 2021
This revision was automatically updated to reflect the committed changes.
Closed by commit rG6617a5a5eaee: [AMDGPU] Move insertion of function entry waitcnt later (authored by kerbowa).
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D101380/new/
https://reviews.llvm.org/D101380
Files:
llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
Index: llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
===================================================================
--- llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
+++ llvm/test/CodeGen/AMDGPU/waitcnt-preexisting.mir
@@ -192,4 +192,17 @@
$vgpr0 = FLAT_LOAD_DWORD $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
S_WAITCNT 3952
KILL $vgpr0
+
+# Combine preexisting waitcnt with wait added to the start of a non-entry function.
+
+---
+name: test_waitcnt_preexisting_func_start
+body: |
+ bb.0:
+ ; GFX9-LABEL: name: test_waitcnt_preexisting_func_start
+ ; GFX9: S_WAITCNT 0
+ ; GFX9-NOT: S_WAITCNT 0
+ ; GFX9: S_ENDPGM 0
+ S_WAITCNT 0
+ S_ENDPGM 0
...
Index: llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1600,6 +1600,28 @@
TrackedWaitcntSet.clear();
BlockInfos.clear();
+ bool Modified = false;
+
+ if (!MFI->isEntryFunction()) {
+ // Wait for any outstanding memory operations that the input registers may
+ // depend on. We can't track them and it's better to do the wait after the
+ // costly call sequence.
+
+ // TODO: Could insert earlier and schedule more liberally with operations
+ // that only use caller preserved registers.
+ MachineBasicBlock &EntryBB = MF.front();
+ MachineBasicBlock::iterator I = EntryBB.begin();
+ for (MachineBasicBlock::iterator E = EntryBB.end();
+ I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
+ ;
+ BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
+ if (ST->hasVscnt())
+ BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
+ .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
+ .addImm(0);
+
+ Modified = true;
+ }
// Keep iterating over the blocks in reverse post order, inserting and
// updating s_waitcnt where needed, until a fix point is reached.
@@ -1607,7 +1629,6 @@
BlockInfos.insert({MBB, BlockInfo(MBB)});
std::unique_ptr<WaitcntBrackets> Brackets;
- bool Modified = false;
bool Repeat;
do {
Repeat = false;
@@ -1707,26 +1728,5 @@
}
}
- if (!MFI->isEntryFunction()) {
- // Wait for any outstanding memory operations that the input registers may
- // depend on. We can't track them and it's better to the wait after the
- // costly call sequence.
-
- // TODO: Could insert earlier and schedule more liberally with operations
- // that only use caller preserved registers.
- MachineBasicBlock &EntryBB = MF.front();
- MachineBasicBlock::iterator I = EntryBB.begin();
- for (MachineBasicBlock::iterator E = EntryBB.end();
- I != E && (I->isPHI() || I->isMetaInstruction()); ++I)
- ;
- BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT)).addImm(0);
- if (ST->hasVscnt())
- BuildMI(EntryBB, I, DebugLoc(), TII->get(AMDGPU::S_WAITCNT_VSCNT))
- .addReg(AMDGPU::SGPR_NULL, RegState::Undef)
- .addImm(0);
-
- Modified = true;
- }
-
return Modified;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D101380.343253.patch
Type: text/x-patch
Size: 3179 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210506/c8d8c789/attachment.bin>
More information about the llvm-commits
mailing list