[llvm] e5972f2 - [AMDGPU] Simplify VCCZ bug handling

Wed Oct 30 10:11:44 PDT 2019

Author: Jay Foad
Date: 2019-10-30T17:09:07Z
New Revision: e5972f2a04ee48a9190cd25f0d5b24cbca4d47f2

URL: https://github.com/llvm/llvm-project/commit/e5972f2a04ee48a9190cd25f0d5b24cbca4d47f2
DIFF: https://github.com/llvm/llvm-project/commit/e5972f2a04ee48a9190cd25f0d5b24cbca4d47f2.diff

LOG: [AMDGPU] Simplify VCCZ bug handling

Summary:
VCCZBugHandledSet was used to make sure we don't apply the same
workaround more than once to a single cbranch instruction, but it's not
necessary because the workaround involves inserting an s_waitcnt
instruction, which is enough for subsequent iterations to detect that no
further workaround is necessary.

Also beef up the test case to check that the workaround was only applied
once. I have also manually verified that the test still passes even if I
hack the big do-while loop in runOnMachineFunction to run a minimum of
five iterations.

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D69621

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
    llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index 6a688b70554d..14b12f901979 100644

--- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -372,7 +372,6 @@ class SIInsertWaitcnts : public MachineFunctionPass {
   AMDGPU::IsaVersion IV;
 
   DenseSet<MachineInstr *> TrackedWaitcntSet;
-  DenseSet<MachineInstr *> VCCZBugHandledSet;
 
   struct BlockInfo {
     MachineBasicBlock *MBB;
@@ -1388,8 +1387,7 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
     }
 
     bool VCCZBugWorkAround = false;
-    if (readsVCCZ(Inst) &&
-        (!VCCZBugHandledSet.count(&Inst))) {
+    if (readsVCCZ(Inst)) {
       if (ScoreBrackets.getScoreLB(LGKM_CNT) <
               ScoreBrackets.getScoreUB(LGKM_CNT) &&
           ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
@@ -1431,7 +1429,6 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF,
               TII->get(ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
               TRI->getVCC())
           .addReg(TRI->getVCC());
-      VCCZBugHandledSet.insert(&Inst);
       Modified = true;
     }
 
@@ -1471,7 +1468,6 @@ bool SIInsertWaitcnts::runOnMachineFunction(MachineFunction &MF) {
       RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
 
   TrackedWaitcntSet.clear();
-  VCCZBugHandledSet.clear();
   RpotIdxMap.clear();
   BlockInfos.clear();
 

diff  --git a/llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll b/llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
index 823785dc752d..146e631eebbf 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
@@ -1,13 +1,13 @@
 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOVCCZ-BUG %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-FUNC: {{^}}vccz_workaround:
 ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
 ; GCN: v_cmp_neq_f32_e64 {{[^,]*}}, s{{[0-9]+}}, 0{{$}}
 ; VCCZ-BUG: s_waitcnt lgkmcnt(0)
 ; VCCZ-BUG: s_mov_b64 vcc, vcc
-; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc
+; GCN-NOT: s_mov_b64 vcc, vcc
 ; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
 ; GCN: buffer_store_dword
 ; GCN: [[EXIT]]:
@@ -28,6 +28,8 @@ endif:
 
 ; GCN-FUNC: {{^}}vccz_noworkaround:
 ; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
+; GCN-NOT: s_waitcnt lgkmcnt(0)
+; GCN-NOT: s_mov_b64 vcc, vcc
 ; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
 ; GCN: buffer_store_dword
 ; GCN: [[EXIT]]: