[llvm] 6e17708 - AMDGPU: Fix SI_IF lowering when the save exec reg has terminator uses

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 9 14:59:40 PST 2020


Author: Matt Arsenault
Date: 2020-02-09T17:59:19-05:00
New Revision: 6e1770821fbd05bd5180530aca17e1455d1c29d8

URL: https://github.com/llvm/llvm-project/commit/6e1770821fbd05bd5180530aca17e1455d1c29d8
DIFF: https://github.com/llvm/llvm-project/commit/6e1770821fbd05bd5180530aca17e1455d1c29d8.diff

LOG: AMDGPU: Fix SI_IF lowering when the save exec reg has terminator uses

Reverts part of 6524a7a2b9ca072bd7f7b4355d1230e70c679d2f. Since that
commit, the expansion was ignoring the actual save exec register
produced by the instruction, and looking at other instructions. I do
not understand why it was looking at other instructions, but relying
on this scan was wrong.

Fixes verifier errors after SI_IF is tail duplicated, which should be
correct to do. The results were fed into a phi, which was lowered to
the S_MOV_B64_term instructions.

Added: 
    llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir

Modified: 
    llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
index 61d2719a3aad..06173551e92e 100644
--- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
@@ -98,8 +98,6 @@ class SILowerControlFlow : public MachineFunctionPass {
   void emitLoop(MachineInstr &MI);
   void emitEndCf(MachineInstr &MI);
 
-  Register getSaveExec(MachineInstr* MI);
-
   void findMaskOperands(MachineInstr &MI, unsigned OpNo,
                         SmallVectorImpl<MachineOperand> &Src) const;
 
@@ -177,29 +175,11 @@ static bool isSimpleIf(const MachineInstr &MI, const MachineRegisterInfo *MRI,
   return true;
 }
 
-Register SILowerControlFlow::getSaveExec(MachineInstr *MI) {
-  MachineBasicBlock *MBB = MI->getParent();
-  MachineOperand &SaveExec = MI->getOperand(0);
-  assert(SaveExec.getSubReg() == AMDGPU::NoSubRegister);
-
-  Register SaveExecReg = SaveExec.getReg();
-  unsigned FalseTermOpc =
-      TII->isWave32() ? AMDGPU::S_MOV_B32_term : AMDGPU::S_MOV_B64_term;
-  MachineBasicBlock::iterator I = (MI);
-  MachineBasicBlock::iterator J = std::next(I);
-  if (J != MBB->end() && J->getOpcode() == FalseTermOpc &&
-      J->getOperand(1).isReg() && J->getOperand(1).getReg() == SaveExecReg) {
-    SaveExecReg = J->getOperand(0).getReg();
-    J->eraseFromParent();
-  }
-  return SaveExecReg;
-}
-
 void SILowerControlFlow::emitIf(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
   MachineBasicBlock::iterator I(&MI);
-  Register SaveExecReg = getSaveExec(&MI);
+  Register SaveExecReg = MI.getOperand(0).getReg();
   MachineOperand& Cond = MI.getOperand(1);
   assert(Cond.getSubReg() == AMDGPU::NoSubRegister);
 
@@ -282,7 +262,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
 
-  Register DstReg = getSaveExec(&MI);
+  Register DstReg = MI.getOperand(0).getReg();
 
   bool ExecModified = MI.getOperand(3).getImm() != 0;
   MachineBasicBlock::iterator Start = MBB.begin();
@@ -354,7 +334,7 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
 void SILowerControlFlow::emitIfBreak(MachineInstr &MI) {
   MachineBasicBlock &MBB = *MI.getParent();
   const DebugLoc &DL = MI.getDebugLoc();
-  auto Dst = getSaveExec(&MI);
+  auto Dst = MI.getOperand(0).getReg();
 
   // Skip ANDing with exec if the break condition is already masked by exec
   // because it is a V_CMP in the same basic block. (We know the break

diff  --git a/llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir b/llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir
new file mode 100644
index 000000000000..5850a3b27bce
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/si-if-lower-user-terminators.mir
@@ -0,0 +1,75 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=si-lower-control-flow -verify-machineinstrs -o - %s | FileCheck %s
+
+# The save exec result register of SI_IF is used by other terminators
+# inserted to behave as a lowered phi. The output register of SI_IF
+# was ignored, and the def was removed, so the S_MOV_B64_term uses
+# would fail the verifier.
+
+---
+name:            si_if_use
+alignment:       1
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: si_if_use
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+  ; CHECK:   [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
+  ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed $vgpr1
+  ; CHECK:   [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U32_e64 killed [[COPY]], killed [[COPY1]], implicit $exec
+  ; CHECK:   [[COPY2:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+  ; CHECK:   [[S_AND_B64_:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY2]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+  ; CHECK:   [[S_XOR_B64_:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_]], [[COPY2]], implicit-def dead $scc
+  ; CHECK:   $exec = S_MOV_B64_term killed [[S_AND_B64_]]
+  ; CHECK:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
+  ; CHECK:   [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_]], implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   [[COPY3:%[0-9]+]]:sreg_64_xexec = COPY [[S_MOV_B64_term1]]
+  ; CHECK:   dead %7:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
+  ; CHECK:   [[COPY4:%[0-9]+]]:sreg_64_xexec = COPY [[COPY3]]
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   [[COPY5:%[0-9]+]]:sreg_64_xexec = COPY [[COPY4]]
+  ; CHECK:   $exec = S_OR_B64 $exec, killed [[COPY5]], implicit-def $scc
+  ; CHECK:   S_SLEEP 1
+  ; CHECK:   [[COPY6:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
+  ; CHECK:   [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY6]], [[V_CMP_EQ_U32_e64_]], implicit-def dead $scc
+  ; CHECK:   [[S_XOR_B64_1:%[0-9]+]]:sreg_64_xexec = S_XOR_B64 [[S_AND_B64_1]], [[COPY6]], implicit-def dead $scc
+  ; CHECK:   $exec = S_MOV_B64_term killed [[S_AND_B64_1]]
+  ; CHECK:   S_CBRANCH_EXECZ %bb.1, implicit $exec
+  ; CHECK:   [[S_MOV_B64_term1:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
+  ; CHECK:   [[S_MOV_B64_term2:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term [[S_XOR_B64_1]], implicit $exec
+  ; CHECK:   S_BRANCH %bb.2
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+
+    %0:vgpr_32 = COPY killed $vgpr0
+    %1:vgpr_32 = COPY killed $vgpr1
+    %3:sreg_64_xexec = V_CMP_EQ_U32_e64 killed %0, killed %1, implicit $exec
+    %10:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    %14:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
+    %13:sreg_64_xexec = S_MOV_B64_term %10, implicit $exec
+    S_BRANCH %bb.2
+
+  bb.1:
+    %11:sreg_64_xexec = COPY %13
+    dead %6:vgpr_32 = GLOBAL_LOAD_DWORD undef %8:vreg_64, 0, 0, 0, 0, implicit $exec :: (volatile load 4, addrspace 1)
+    %14:sreg_64_xexec = COPY %11
+
+  bb.2:
+    %12:sreg_64_xexec = COPY %14
+    SI_END_CF killed %12, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    S_SLEEP 1
+    %9:sreg_64_xexec = SI_IF %3, %bb.1, implicit-def $exec, implicit-def dead $scc, implicit $exec
+    %14:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
+    %13:sreg_64_xexec = S_MOV_B64_term %9, implicit $exec
+    S_BRANCH %bb.2
+
+...


        


More information about the llvm-commits mailing list