[llvm] r325524 - [AMDGPU] Make note of existing waitcnt instrs; this is add-on work related to suppression of redundant waitcnt instrs. It is necessary to make note of these existing waitcnt instrs so that we do not fall into an infinite loop when handling loops. Also, [NFC] some minor code clean-up.

Mark Searles via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 19 11:19:59 PST 2018


Author: msearles
Date: Mon Feb 19 11:19:59 2018
New Revision: 325524

URL: http://llvm.org/viewvc/llvm-project?rev=325524&view=rev
Log:
[AMDGPU] Make note of existing waitcnt instrs; this is add-on work related to suppression of redundant waitcnt instrs. It is necessary to make note of these existing waitcnt instrs so that we do not fall into an infinite loop when handling loops. Also, [NFC] some minor code clean-up.

Modified:
    llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
    llvm/trunk/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir

Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=325524&r1=325523&r2=325524&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Mon Feb 19 11:19:59 2018
@@ -1125,7 +1125,8 @@ void SIInsertWaitcnts::generateSWaitCntI
     if (EmitSwaitcnt != 0) {
       MachineInstr *OldWaitcnt = ScoreBrackets->getWaitcnt();
       int Imm = (!OldWaitcnt) ? 0 : OldWaitcnt->getOperand(0).getImm();
-      if (!OldWaitcnt || (AMDGPU::decodeVmcnt(IV, Imm) !=
+      if (!OldWaitcnt ||
+          (AMDGPU::decodeVmcnt(IV, Imm) !=
                           (CntVal[VM_CNT] & AMDGPU::getVmcntBitMask(IV))) ||
           (AMDGPU::decodeExpcnt(IV, Imm) !=
            (CntVal[EXP_CNT] & AMDGPU::getExpcntBitMask(IV))) ||
@@ -1143,7 +1144,7 @@ void SIInsertWaitcnts::generateSWaitCntI
             ScoreBracket = BlockWaitcntBracketsMap[TBB].get();
           }
           ScoreBracket->setRevisitLoop(true);
-          DEBUG(dbgs() << "set-revisit: block"
+          DEBUG(dbgs() << "set-revisit: Block"
                        << ContainingLoop->getHeader()->getNumber() << '\n';);
         }
       }
@@ -1151,18 +1152,14 @@ void SIInsertWaitcnts::generateSWaitCntI
       // Update an existing waitcount, or make a new one.
       unsigned Enc = AMDGPU::encodeWaitcnt(IV, CntVal[VM_CNT],
                                            CntVal[EXP_CNT], CntVal[LGKM_CNT]);
-      // We don't (yet) track waitcnts that existed prior to the waitcnt
-      // pass (we just skip over them); because the waitcnt pass is ignorant
-      // of them, it may insert a redundant waitcnt. To avoid this, check
-      // the prev instr. If it and the to-be-inserted waitcnt are the
-      // same, keep the prev waitcnt and skip the insertion. We assume that
-      // whomever. e.g., for memory model, inserted the prev waitcnt really
-      // wants it there.
+      // We don't remove waitcnts that existed prior to the waitcnt
+      // pass. Check if the waitcnt to-be-inserted can be avoided
+      // or if the prev waitcnt can be updated.
       bool insertSWaitInst = true;
       for (MachineBasicBlock::iterator I = MI.getIterator(),
                                        B = MI.getParent()->begin();
            insertSWaitInst && I != B; --I) {
-        if (I ==  MI.getIterator())
+        if (I == MI.getIterator())
           continue;
 
         switch (I->getOpcode()) {
@@ -1585,15 +1582,16 @@ void SIInsertWaitcnts::insertWaitcntInBl
     MachineInstr &Inst = *Iter;
     // Remove any previously existing waitcnts.
     if (Inst.getOpcode() == AMDGPU::S_WAITCNT) {
-      // TODO: Register the old waitcnt and optimize the following waitcnts.
-      // Leaving the previously existing waitcnts is conservatively correct.
+      // Leave pre-existing waitcnts, but note their existence via setWaitcnt.
+      // Remove the waitcnt-pass-generated waitcnts; the pass will add them back
+      // as needed.
       if (!TrackedWaitcntSet.count(&Inst))
         ++Iter;
       else {
-        ScoreBrackets->setWaitcnt(&Inst);
         ++Iter;
         Inst.removeFromParent();
       }
+      ScoreBrackets->setWaitcnt(&Inst);
       continue;
     }
 
@@ -1684,8 +1682,8 @@ void SIInsertWaitcnts::insertWaitcntInBl
     if (WaitcntData->getIterCnt() > 2) {
       // To ensure convergence, need to make wait events at loop footer be no
       // more than those from the previous iteration.
-      // As a simplification, Instead of tracking individual scores and
-      // generate the precise wait count, just wait on 0.
+      // As a simplification, instead of tracking individual scores and
+      // generating the precise wait count, just wait on 0.
       bool HasPending = false;
       MachineInstr *SWaitInst = WaitcntData->getWaitcnt();
       for (enum InstCounterType T = VM_CNT; T < NUM_INST_CNTS;
@@ -1722,7 +1720,7 @@ void SIInsertWaitcnts::insertWaitcntInBl
 
         // Add this waitcnt to the block. It is either newly created or
         // created in previous iterations and added back since block traversal
-        // always remove waitcnt.
+        // always removes waitcnts.
         insertWaitcntBeforeCF(Block, SWaitInst);
         WaitcntData->setWaitcnt(SWaitInst);
       }
@@ -1788,7 +1786,7 @@ bool SIInsertWaitcnts::runOnMachineFunct
     if (ContainingLoop && ContainingLoop->getHeader() == &MBB && J < I &&
         (!BlockWaitcntProcessedSet.count(&MBB))) {
       BlockWaitcntBracketsMap[&MBB]->setRevisitLoop(true);
-      DEBUG(dbgs() << "set-revisit: block"
+      DEBUG(dbgs() << "set-revisit: Block"
                    << ContainingLoop->getHeader()->getNumber() << '\n';);
     }
 
@@ -1819,7 +1817,7 @@ bool SIInsertWaitcnts::runOnMachineFunct
         }
         LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();
         WaitcntData->incIterCnt();
-        DEBUG(dbgs() << "revisit: block" << EntryBB->getNumber() << '\n';);
+        DEBUG(dbgs() << "revisit: Block" << EntryBB->getNumber() << '\n';);
         continue;
       } else {
         LoopWaitcntData *WaitcntData = LoopWaitcntDataMap[ContainingLoop].get();

Modified: llvm/trunk/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir?rev=325524&r1=325523&r2=325524&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/waitcnt-no-redundant.mir Mon Feb 19 11:19:59 2018
@@ -1,24 +1,25 @@
 # RUN: llc -mtriple=amdgcn -verify-machineinstrs -run-pass si-insert-waitcnts -o - %s | FileCheck %s
 
 # Check that the waitcnt pass does *not* insert a redundant waitcnt instr.
-# In this testcase, ensure that pass does not insert redundant S_WAITCNT 127
-# or S_WAITCNT 3952
+# In this testcase, ensure that pass does not insert redundant S_WAITCNT 3952
 
 ...
 # CHECK-LABEL: name: waitcnt-no-redundant
-# CHECK: DS_READ_B64
-# CHECK-NEXT: S_WAITCNT 127
+# CHECK: S_WAITCNT 3952
 # CHECK-NEXT: FLAT_ATOMIC_CMPSWAP
 # CHECK-NEXT: S_WAITCNT 3952
-# CHECK-NEXT: BUFFER_WBINVL1_VOL
+# CHECK-NEXT: BUFFER_WBINVL1
 
 name: waitcnt-no-redundant
 body: |
-    bb.0:
-    renamable $vgpr0_vgpr1 = DS_READ_B64 killed renamable $vgpr0, 0, 0, implicit $m0, implicit $exec
-    S_WAITCNT 127
-    FLAT_ATOMIC_CMPSWAP killed renamable $vgpr0_vgpr1, killed renamable $vgpr3_vgpr4, 0, 0, implicit $exec, implicit $flat_scr
+  bb.0:
+    renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec
+
+  bb.1:
+    S_WAITCNT 3952
+    FLAT_ATOMIC_CMPSWAP undef renamable $vgpr0_vgpr1, renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr
     S_WAITCNT 3952
-    BUFFER_WBINVL1_VOL implicit $exec
-    S_ENDPGM
+    BUFFER_WBINVL1 implicit $exec
+    S_BRANCH %bb.1
+
 ...




More information about the llvm-commits mailing list