[llvm] 611959f - [AMDGPU] Fixed v_swap_b32 match

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 21 10:14:34 PDT 2020


Author: Stanislav Mekhanoshin
Date: 2020-10-21T10:14:24-07:00
New Revision: 611959f004d78f99e45fdc940a7a4322b85d04d9

URL: https://github.com/llvm/llvm-project/commit/611959f004d78f99e45fdc940a7a4322b85d04d9
DIFF: https://github.com/llvm/llvm-project/commit/611959f004d78f99e45fdc940a7a4322b85d04d9.diff

LOG: [AMDGPU] Fixed v_swap_b32 match

1. Fixed liveness issue with implicit kills.
2. Fixed potential problem with an indirect mov.

Fixes: SWDEV-256848

Differential Revision: https://reviews.llvm.org/D89599

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
    llvm/test/CodeGen/AMDGPU/v_swap_b32.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 94ade095e7a9..29cb1001fcad 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -437,6 +437,22 @@ getSubRegForIndex(Register Reg, unsigned Sub, unsigned I,
   return TargetInstrInfo::RegSubRegPair(Reg, Sub);
 }
 
+static void dropInstructionKeepingImpDefs(MachineInstr &MI,
+                                          const SIInstrInfo *TII) {
+  for (unsigned i = MI.getDesc().getNumOperands() +
+         MI.getDesc().getNumImplicitUses() +
+         MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands();
+       i != e; ++i) {
+    const MachineOperand &Op = MI.getOperand(i);
+    if (!Op.isDef())
+      continue;
+    BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
+            TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
+  }
+
+  MI.eraseFromParent();
+}
+
 // Match:
 // mov t, x
 // mov x, y
@@ -476,18 +492,25 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
   if (!TRI.isVGPR(MRI, X))
     return nullptr;
 
+  if (MovT.hasRegisterImplicitUseOperand(AMDGPU::M0))
+    return nullptr;
+
   const unsigned SearchLimit = 16;
   unsigned Count = 0;
+  bool KilledT = false;
   for (auto Iter = std::next(MovT.getIterator()),
             E = MovT.getParent()->instr_end();
-       Iter != E && Count < SearchLimit; ++Iter, ++Count) {
+       Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
 
     MachineInstr *MovY = &*Iter;
+    KilledT = MovY->killsRegister(T, &TRI);
+
     if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
          MovY->getOpcode() != AMDGPU::COPY) ||
         !MovY->getOperand(1).isReg()        ||
         MovY->getOperand(1).getReg() != T   ||
-        MovY->getOperand(1).getSubReg() != Tsub)
+        MovY->getOperand(1).getSubReg() != Tsub ||
+        MovY->hasRegisterImplicitUseOperand(AMDGPU::M0))
       continue;
 
     Register Y = MovY->getOperand(0).getReg();
@@ -521,32 +544,53 @@ static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI,
         MovX = nullptr;
         break;
       }
+      // Implicit use of M0 is an indirect move.
+      if (I->hasRegisterImplicitUseOperand(AMDGPU::M0))
+        continue;
+
+      if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0 : 1)))
+        continue;
+
       MovX = &*I;
     }
 
     if (!MovX)
       continue;
 
-    LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY);
+    LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
 
     for (unsigned I = 0; I < Size; ++I) {
       TargetInstrInfo::RegSubRegPair X1, Y1;
       X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI);
       Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI);
-      BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(),
-                TII->get(AMDGPU::V_SWAP_B32))
+      MachineBasicBlock &MBB = *MovT.getParent();
+      auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
+                         TII->get(AMDGPU::V_SWAP_B32))
         .addDef(X1.Reg, 0, X1.SubReg)
         .addDef(Y1.Reg, 0, Y1.SubReg)
         .addReg(Y1.Reg, 0, Y1.SubReg)
         .addReg(X1.Reg, 0, X1.SubReg).getInstr();
+      if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
+        // Drop implicit EXEC.
+        MIB->RemoveOperand(MIB->getNumExplicitOperands());
+        MIB->copyImplicitOps(*MBB.getParent(), *MovX);
+      }
     }
     MovX->eraseFromParent();
-    MovY->eraseFromParent();
+    dropInstructionKeepingImpDefs(*MovY, TII);
     MachineInstr *Next = &*std::next(MovT.getIterator());
-    if (MRI.use_nodbg_empty(T))
-      MovT.eraseFromParent();
-    else
+
+    if (MRI.use_nodbg_empty(T)) {
+      dropInstructionKeepingImpDefs(MovT, TII);
+    } else {
       Xop.setIsKill(false);
+      for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
+        unsigned OpNo = MovT.getNumExplicitOperands() + I;
+        const MachineOperand &Op = MovT.getOperand(OpNo);
+        if (Op.isKill() && TRI.regsOverlap(X, Op.getReg()))
+          MovT.RemoveOperand(OpNo);
+      }
+    }
 
     return Next;
   }

diff  --git a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
index 3190641ae691..d557060207df 100644
--- a/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
+++ b/llvm/test/CodeGen/AMDGPU/v_swap_b32.mir
@@ -517,7 +517,9 @@ body:             |
 ...
 
 # GCN-LABEL: name: swap_virt_copy_subreg_impdef_super
-# GCN: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
+# GCN:      %2:vreg_64 = IMPLICIT_DEF
+# GCN-NEXT: %2.sub1:vreg_64 = COPY %0.sub1
+# GCN-NEXT: %0.sub0:vreg_64, %1.sub0:vreg_64 = V_SWAP_B32 %1.sub0, %0.sub0, implicit $exec
 ---
 name:            swap_virt_copy_subreg_impdef_super
 registers:
@@ -672,3 +674,222 @@ body:             |
     %1 = COPY %2
     S_ENDPGM 0
 ...
+
+# GCN-LABEL: name: swap_liveness_error_mov
+# GCN:      $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF
+# GCN-NEXT: $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7
+# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec
+
+---
+name: swap_liveness_error_mov
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr5, $vgpr1_vgpr2
+
+    $vgpr6 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit killed $vgpr1_vgpr2
+    $vgpr1 = V_MOV_B32_e32 killed $vgpr5, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7
+    $vgpr6 = V_MOV_B32_e32 $vgpr7, implicit $exec, implicit $vgpr6_vgpr7
+    $vgpr5 = V_MOV_B32_e32 $vgpr6, implicit $exec
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: swap_liveness_error_copy
+# GCN:      $vgpr6 = COPY $vgpr1
+# GCN-NEXT: $vgpr1, $vgpr5 = V_SWAP_B32 $vgpr5, $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5_vgpr6 = IMPLICIT_DEF
+# GCN-NEXT: $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7
+# GCN-NEXT: $vgpr5 = COPY $vgpr6
+
+---
+name: swap_liveness_error_copy
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr5, $vgpr1_vgpr2
+
+    $vgpr6 = COPY $vgpr1, implicit killed $vgpr1_vgpr2
+    $vgpr1 = COPY killed $vgpr5
+    $vgpr5 = COPY $vgpr6, implicit-def $vgpr5_vgpr6, implicit $vgpr6_vgpr7
+    $vgpr6 = COPY $vgpr7, implicit $vgpr6_vgpr7
+    $vgpr5 = COPY $vgpr6
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: name: swap_killed_t_early
+# GCN:      $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
+
+---
+name:            swap_killed_t_early
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec, implicit killed $vgpr2
+    $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
+    S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: swap_killed_t_late
+# GCN:      $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
+
+---
+name:            swap_killed_t_late
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr2
+    $vgpr1 = V_MOV_B32_e32 undef $vgpr2, implicit $exec
+    S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: swap_killed_x
+# GCN:      $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+
+---
+name:            swap_killed_x
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+    $vgpr3 = V_MOV_B32_e32 killed $vgpr4, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+    $vgpr5 = V_MOV_B32_e32 killed $vgpr6, implicit $exec, implicit killed $vgpr0
+    $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+    S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: indirect_mov_t
+# GCN:      $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+
+---
+name:            indirect_mov_t
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $m0
+    $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+    S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: indirect_mov_x
+# GCN:      $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+
+---
+name:            indirect_mov_x
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $m0
+    $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec
+    S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: indirect_mov_y
+# GCN:      $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+# GCN-NEXT: $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0
+# GCN-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+
+---
+name:            indirect_mov_y
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    $vgpr2 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 killed $vgpr2, implicit $exec, implicit $m0
+    S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: implicit_ops_mov_x_swap_b32
+# GCN: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2
+
+---
+name:            implicit_ops_mov_x_swap_b32
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2
+    $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+    S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: name: implict_ops_mov_x_swap_b64
+# GCN:      %2:vreg_64 = COPY %0
+# GCN-NEXT: %0:vreg_64 = COPY %1, implicit $vgpr0
+# GCN-NEXT: %1:vreg_64 = COPY %2
+
+---
+name:            implict_ops_mov_x_swap_b64
+registers:
+  - { id: 0, class: vreg_64 }
+  - { id: 1, class: vreg_64 }
+  - { id: 2, class: vreg_64 }
+body:             |
+  bb.0:
+    %0 = IMPLICIT_DEF
+    %1 = IMPLICIT_DEF
+    %2 = COPY %0
+    %0 = COPY %1, implicit $vgpr0
+    %1 = COPY %2
+...
+
+# GCN-LABEL: implicit_ops_mov_t_swap_b32
+# GCN:      $vgpr1 = IMPLICIT_DEF
+# GCN-NEXT: $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
+
+---
+name:            implicit_ops_mov_t_swap_b32
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec, implicit $vgpr2, implicit killed $vgpr1_vgpr2, implicit-def $vgpr1
+    $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec
+    S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+# GCN-LABEL: implicit_ops_mov_y_swap_b32
+# GCN:      $vgpr0, $vgpr1 = V_SWAP_B32 $vgpr1, $vgpr0, implicit $exec
+# GCN-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF
+
+---
+name:            implicit_ops_mov_y_swap_b32
+body:             |
+  bb.0:
+    liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
+    $vgpr3 = V_MOV_B32_e32 killed $vgpr0, implicit $exec
+    $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
+    $vgpr1 = V_MOV_B32_e32 killed $vgpr3, implicit $exec, implicit $vgpr2, implicit-def $vgpr0_vgpr1, implicit killed $vgpr3
+    S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...


        


More information about the llvm-commits mailing list