[llvm] r304311 - [AMDGPU] Fix bugs in new waitcnt pass. Add test.
Mark Searles via llvm-commits
llvm-commits at lists.llvm.org
Wed May 31 09:44:23 PDT 2017
Author: msearles
Date: Wed May 31 11:44:23 2017
New Revision: 304311
URL: http://llvm.org/viewvc/llvm-project?rev=304311&view=rev
Log:
[AMDGPU] Fix bugs in new waitcnt pass. Add test.
- new waitcnt pass remains off by default; -enable-si-insert-waitcnts=1 to enable it
- fix handling of PERMUTE ops
- fix insertion of waitcnt instrs at function begin/end ( port of analogous code that was added to old waitcnt pass )
- add new test
Differential Revision: https://reviews.llvm.org/D33114
Added:
llvm/trunk/test/CodeGen/AMDGPU/waitcnt-permute.mir
Modified:
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=304311&r1=304310&r2=304311&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Wed May 31 11:44:23 2017
@@ -826,7 +826,8 @@ MachineInstr *SIInsertWaitcnts::generate
// NOTE: this could be improved with knowledge of all call sites or
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::RETURN ||
- MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG) {
+ MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
+ MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
T = (enum InstCounterType)(T + 1)) {
if (ScoreBrackets->getScoreUB(T) > ScoreBrackets->getScoreLB(T)) {
@@ -1149,8 +1150,10 @@ void SIInsertWaitcnts::updateEventWaitCn
// instruction, update the upper-bound of the appropriate counter's
// bracket and the destination operand scores.
// TODO: Use the (TSFlags & SIInstrFlags::LGKM_CNT) property everywhere.
- if (TII->isDS(Inst) && (Inst.mayLoad() || Inst.mayStore())) {
- if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) {
+ uint64_t TSFlags = Inst.getDesc().TSFlags;
+ if (TII->isDS(Inst) && (TSFlags & SIInstrFlags::LGKM_CNT)) {
+ if (TII->getNamedOperand(Inst, AMDGPU::OpName::gds) &&
+ TII->getNamedOperand(Inst, AMDGPU::OpName::gds)->getImm() != 0) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_ACCESS, Inst);
ScoreBrackets->updateByEvent(TII, TRI, MRI, GDS_GPR_LOCK, Inst);
} else {
@@ -1183,7 +1186,7 @@ void SIInsertWaitcnts::updateEventWaitCn
Inst.getOpcode() != AMDGPU::BUFFER_WBINVL1_VOL) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMEM_ACCESS, Inst);
if ( // TODO: assumed yes -- target_info->MemWriteNeedsExpWait() &&
- (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()))) {
+ (Inst.mayStore() || AMDGPU::getAtomicNoRetOp(Inst.getOpcode()) != -1)) {
ScoreBrackets->updateByEvent(TII, TRI, MRI, VMW_GPR_LOCK, Inst);
}
} else if (TII->isSMRD(Inst)) {
@@ -1715,6 +1718,7 @@ bool SIInsertWaitcnts::runOnMachineFunct
MRI = &MF.getRegInfo();
MLI = &getAnalysis<MachineLoopInfo>();
IV = AMDGPU::IsaInfo::getIsaVersion(ST->getFeatureBits());
+ const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
AMDGPUASI = ST->getAMDGPUAS();
HardwareLimits.VmcntMax = AMDGPU::getVmcntBitMask(IV);
@@ -1859,5 +1863,19 @@ bool SIInsertWaitcnts::runOnMachineFunct
}
}
+ if (!MFI->isEntryFunction()) {
+ // Wait for any outstanding memory operations that the input registers may
+ // depend on. We can't track them and it's better to to the wait after the
+ // costly call sequence.
+
+ // TODO: Could insert earlier and schedule more liberally with operations
+ // that only use caller preserved registers.
+ MachineBasicBlock &EntryBB = MF.front();
+ BuildMI(EntryBB, EntryBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_WAITCNT))
+ .addImm(0);
+
+ Modified = true;
+ }
+
return Modified;
}
Added: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-permute.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-permute.mir?rev=304311&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-permute.mir (added)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-permute.mir Wed May 31 11:44:23 2017
@@ -0,0 +1,33 @@
+# RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s
+
+--- |
+ define float @waitcnt-permute(i32 %x, i32 %y) {
+ entry:
+ %0 = call i32 @llvm.amdgcn.ds.bpermute(i32 %x, i32 %y)
+ %1 = bitcast i32 %0 to float
+ %2 = fadd float 1.000000e+00, %1
+ ret float %2
+ }
+
+ declare i32 @llvm.amdgcn.ds.bpermute(i32, i32)
+
+...
+---
+# CHECK-LABEL: name: waitcnt-permute{{$}}
+# CHECK: DS_BPERMUTE_B32
+# CHECK-NEXT: S_WAITCNT 127
+
+name: waitcnt-permute
+liveins:
+ - { reg: '%vgpr0' }
+ - { reg: '%vgpr1' }
+ - { reg: '%sgpr30_sgpr31' }
+body: |
+ bb.0:
+ liveins: %vgpr0, %vgpr1, %sgpr30_sgpr31
+
+ %vgpr0 = DS_BPERMUTE_B32 killed %vgpr0, killed %vgpr1, 0, implicit %exec
+ %vgpr0 = V_ADD_F32_e32 1065353216, killed %vgpr0, implicit %exec
+ S_SETPC_B64_return killed %sgpr30_sgpr31, implicit killed %vgpr0
+
+...
More information about the llvm-commits
mailing list