[PATCH] D43350: [AMDGPU] Combine adjacent waitcounts in a single strongest wait
Stanislav Mekhanoshin via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 15 14:06:13 PST 2018
This revision was automatically updated to reflect the committed changes.
Closed by commit rL325299: [AMDGPU] Combine adjacent waitcounts in a single strongest wait (authored by rampitec, committed by ).
Changed prior to commit:
https://reviews.llvm.org/D43350?vs=134497&id=134504#toc
Repository:
rL LLVM
https://reviews.llvm.org/D43350
Files:
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
Index: llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
===================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
+++ llvm/trunk/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
@@ -21,8 +21,7 @@
; CHECK-NOT: s_waitcnt
; CHECK: image_load
; CHECK-NEXT: v_lshlrev_b32
-; CHECK-NEXT: s_waitcnt
-; CHECK: s_waitcnt vmcnt(0){{$}}
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0){{$}}
; CHECK-NEXT: image_store
define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) {
%t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
Index: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
===================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -406,6 +406,8 @@
MachineBasicBlock *loopBottom(const MachineLoop *Loop);
void insertWaitcntInBlock(MachineFunction &MF, MachineBasicBlock &Block);
void insertWaitcntBeforeCF(MachineBasicBlock &Block, MachineInstr *Inst);
+ bool isWaitcntStronger(unsigned LHS, unsigned RHS);
+ unsigned combineWaitcnt(unsigned LHS, unsigned RHS);
};
} // end anonymous namespace
@@ -789,6 +791,29 @@
!MI.getOperand(1).isUndef();
}
+/// \brief Given wait count encodings checks if LHS is stronger than RHS.
+bool SIInsertWaitcnts::isWaitcntStronger(unsigned LHS, unsigned RHS) {
+ if (AMDGPU::decodeVmcnt(IV, LHS) > AMDGPU::decodeVmcnt(IV, RHS))
+ return false;
+ if (AMDGPU::decodeLgkmcnt(IV, LHS) > AMDGPU::decodeLgkmcnt(IV, RHS))
+ return false;
+ if (AMDGPU::decodeExpcnt(IV, LHS) > AMDGPU::decodeExpcnt(IV, RHS))
+ return false;
+ return true;
+}
+
+/// \brief Given wait count encodings create a new encoding which is stronger
+/// or equal to both.
+unsigned SIInsertWaitcnts::combineWaitcnt(unsigned LHS, unsigned RHS) {
+ unsigned VmCnt = std::min(AMDGPU::decodeVmcnt(IV, LHS),
+ AMDGPU::decodeVmcnt(IV, RHS));
+ unsigned LgkmCnt = std::min(AMDGPU::decodeLgkmcnt(IV, LHS),
+ AMDGPU::decodeLgkmcnt(IV, RHS));
+ unsigned ExpCnt = std::min(AMDGPU::decodeExpcnt(IV, LHS),
+ AMDGPU::decodeExpcnt(IV, RHS));
+ return AMDGPU::encodeWaitcnt(IV, VmCnt, ExpCnt, LgkmCnt);
+}
+
/// \brief Generate s_waitcnt instruction to be placed before cur_Inst.
/// Instructions of a given type are returned in order,
/// but instructions of different types can complete out of order.
@@ -1134,18 +1159,30 @@
// whomever. e.g., for memory model, inserted the prev waitcnt really
// wants it there.
bool insertSWaitInst = true;
- if (MI.getIterator() != MI.getParent()->begin()) {
- MachineInstr *MIPrevInst = &*std::prev(MI.getIterator());
- if (MIPrevInst &&
- MIPrevInst->getOpcode() == AMDGPU::S_WAITCNT &&
- MIPrevInst->getOperand(0).getImm() == Enc) {
- insertSWaitInst = false;
+ for (MachineBasicBlock::iterator I = MI.getIterator(),
+ B = MI.getParent()->begin();
+ insertSWaitInst && I != B; --I) {
+ if (I == MI.getIterator())
+ continue;
+
+ switch (I->getOpcode()) {
+ case AMDGPU::S_WAITCNT:
+ if (isWaitcntStronger(I->getOperand(0).getImm(), Enc))
+ insertSWaitInst = false;
+ else if (!OldWaitcnt) {
+ OldWaitcnt = &*I;
+ Enc = combineWaitcnt(I->getOperand(0).getImm(), Enc);
+ }
+ break;
+ // TODO: skip over instructions which never require wait.
}
+ break;
}
if (insertSWaitInst) {
if (OldWaitcnt && OldWaitcnt->getOpcode() == AMDGPU::S_WAITCNT) {
OldWaitcnt->getOperand(0).setImm(Enc);
- MI.getParent()->insert(MI, OldWaitcnt);
+ if (!OldWaitcnt->getParent())
+ MI.getParent()->insert(MI, OldWaitcnt);
DEBUG(dbgs() << "updateWaitcntInBlock\n"
<< "Old Instr: " << MI << '\n'
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D43350.134504.patch
Type: text/x-patch
Size: 4142 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180215/80cd77b8/attachment.bin>
More information about the llvm-commits
mailing list