[PATCH] D69621: [AMDGPU] Simplify VCCZ bug handling

Wed Oct 30 09:24:00 PDT 2019

foad created this revision.
Herald added subscribers: hiraditya, t-tye, tpr, dstuttard, yaxunl, nhaehnle, wdng, jvesely, kzhuravl, arsenm.
Herald added a project: LLVM.

VCCZBugHandledSet was used to make sure we don't apply the same
workaround more than once to a single cbranch instruction, but it's not
necessary because the workaround involves inserting an s_waitcnt
instruction, which is enough for subsequent iterations to detect that no
further workaround is necessary.

Also beef up the test case to check that the workaround was only applied
once. I have also manually verified that the test still passes even if I
hack the big do-while loop in runOnMachineFunction to run a minimum of
five iterations.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69621

Files:
  llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
  llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll


Index: llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
===================================================================

--- llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
+++ llvm/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
@@ -1,13 +1,13 @@
 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
 ; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VCCZ-BUG %s
-; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=NOVCCZ-BUG %s
+; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-FUNC: {{^}}vccz_workaround:
 ; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
 ; GCN: v_cmp_neq_f32_e64 {{[^,]*}}, s{{[0-9]+}}, 0{{$}}
 ; VCCZ-BUG: s_waitcnt lgkmcnt(0)
 ; VCCZ-BUG: s_mov_b64 vcc, vcc
-; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc
+; GCN-NOT: s_mov_b64 vcc, vcc
 ; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
 ; GCN: buffer_store_dword
 ; GCN: [[EXIT]]:
@@ -28,6 +28,8 @@
 
 ; GCN-FUNC: {{^}}vccz_noworkaround:
 ; GCN: v_cmp_neq_f32_e32 vcc, 0, v{{[0-9]+}}
+; GCN-NOT: s_waitcnt lgkmcnt(0)
+; GCN-NOT: s_mov_b64 vcc, vcc
 ; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
 ; GCN: buffer_store_dword
 ; GCN: [[EXIT]]:
Index: llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
===================================================================
--- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -372,7 +372,6 @@
   AMDGPU::IsaVersion IV;
 
   DenseSet<MachineInstr *> TrackedWaitcntSet;
-  DenseSet<MachineInstr *> VCCZBugHandledSet;
 
   struct BlockInfo {
     MachineBasicBlock *MBB;
@@ -1388,8 +1387,7 @@
     }
 
     bool VCCZBugWorkAround = false;
-    if (readsVCCZ(Inst) &&
-        (!VCCZBugHandledSet.count(&Inst))) {
+    if (readsVCCZ(Inst)) {
       if (ScoreBrackets.getScoreLB(LGKM_CNT) <
               ScoreBrackets.getScoreUB(LGKM_CNT) &&
           ScoreBrackets.hasPendingEvent(SMEM_ACCESS)) {
@@ -1431,7 +1429,6 @@
               TII->get(ST->isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64),
               TRI->getVCC())
           .addReg(TRI->getVCC());
-      VCCZBugHandledSet.insert(&Inst);
       Modified = true;
     }
 
@@ -1471,7 +1468,6 @@
       RegisterEncoding.SGPR0 + HardwareLimits.NumSGPRsMax - 1;
 
   TrackedWaitcntSet.clear();
-  VCCZBugHandledSet.clear();
   RpotIdxMap.clear();
   BlockInfos.clear();
 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D69621.227122.patch
Type: text/x-patch
Size: 2555 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20191030/ec8e7e52/attachment.bin>