[llvm] r374908 - [AMDGPU] Allow DPP combiner to work with REG_SEQUENCE

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 15 09:17:51 PDT 2019


Author: rampitec
Date: Tue Oct 15 09:17:50 2019
New Revision: 374908

URL: http://llvm.org/viewvc/llvm-project?rev=374908&view=rev
Log:
[AMDGPU] Allow DPP combiner to work with REG_SEQUENCE

Differential Revision: https://reviews.llvm.org/D68828

Modified:
    llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp
    llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir

Modified: llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp?rev=374908&r1=374907&r2=374908&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp Tue Oct 15 09:17:50 2019
@@ -41,6 +41,7 @@
 #include "AMDGPUSubtarget.h"
 #include "SIInstrInfo.h"
 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
@@ -155,8 +156,6 @@ MachineInstr *GCNDPPCombine::createDPPIn
                                            RegSubRegPair CombOldVGPR,
                                            bool CombBCZ) const {
   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
-  assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
-         TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
 
   auto OrigOp = OrigMI.getOpcode();
   auto DPPOp = getDPPOp(OrigOp);
@@ -418,6 +417,7 @@ bool GCNDPPCombine::combineDPPMov(Machin
     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
 
   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
+  DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
@@ -430,13 +430,49 @@ bool GCNDPPCombine::combineDPPMov(Machin
 
   OrigMIs.push_back(&MovMI);
   bool Rollback = true;
+  SmallVector<MachineOperand*, 16> Uses;
+
   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
+    Uses.push_back(&Use);
+  }
+
+  while (!Uses.empty()) {
+    MachineOperand *Use = Uses.pop_back_val();
     Rollback = true;
 
-    auto &OrigMI = *Use.getParent();
+    auto &OrigMI = *Use->getParent();
     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
 
     auto OrigOp = OrigMI.getOpcode();
+    if (OrigOp == AMDGPU::REG_SEQUENCE) {
+      Register FwdReg = OrigMI.getOperand(0).getReg();
+      unsigned FwdSubReg = 0;
+
+      if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
+        LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
+                             " for all uses\n");
+        break;
+      }
+
+      unsigned OpNo, E = OrigMI.getNumOperands();
+      for (OpNo = 1; OpNo < E; OpNo += 2) {
+        if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
+          FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
+          break;
+        }
+      }
+
+      if (!FwdSubReg)
+        break;
+
+      for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
+        if (Op.getSubReg() == FwdSubReg)
+          Uses.push_back(&Op);
+      }
+      RegSeqWithOpNos[&OrigMI].push_back(OpNo);
+      continue;
+    }
+
     if (TII->isVOP3(OrigOp)) {
       if (!TII->hasVALU32BitEncoding(OrigOp)) {
         LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
@@ -457,14 +493,14 @@ bool GCNDPPCombine::combineDPPMov(Machin
     }
 
     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
-    if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
+    if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
                                         OldOpndValue, CombBCZ)) {
         DPPMIs.push_back(DPPInst);
         Rollback = false;
       }
     } else if (OrigMI.isCommutable() &&
-               &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+               Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
       auto *BB = OrigMI.getParent();
       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
       BB->insert(OrigMI, NewMI);
@@ -485,9 +521,22 @@ bool GCNDPPCombine::combineDPPMov(Machin
     OrigMIs.push_back(&OrigMI);
   }
 
+  Rollback |= !Uses.empty();
+
   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
     MI->eraseFromParent();
 
+  if (!Rollback) {
+    for (auto &S : RegSeqWithOpNos) {
+      if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
+        S.first->eraseFromParent();
+        continue;
+      }
+      while (!S.second.empty())
+        S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
+    }
+  }
+
   return !Rollback;
 }
 

Modified: llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir?rev=374908&r1=374907&r2=374908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir Tue Oct 15 09:17:50 2019
@@ -562,3 +562,159 @@ body: |
     %2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $exec
 ...
+
+# GCN-LABEL: name: dpp_reg_sequence_both_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %9:vgpr_32 = IMPLICIT_DEF
+# GCN: %8:vgpr_32 = IMPLICIT_DEF
+# GCN: %6:vgpr_32 = V_ADD_I32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_both_combined
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_first_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %8:vgpr_32 = IMPLICIT_DEF
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
+# GCN: %6:vgpr_32 = V_ADD_I32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_first_combined
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_second_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %8:vgpr_32 = IMPLICIT_DEF
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
+# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_second_combined
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_none_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_none_combined
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_exec_changed
+# GCN:   %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN:   %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN:   %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN:   %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN:   %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN:   %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+# GCN:   S_BRANCH %bb.1
+# GCN: bb.1:
+# GCN:   %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN:   %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_exec_changed
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    S_BRANCH %bb.1
+
+  bb.1:
+    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_subreg
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
+# GCN: %7:vgpr_32 = V_ADD_I32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_subreg
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    %0:vreg_64 = COPY $vgpr0_vgpr1
+    %1:vreg_64 = COPY $vgpr2_vgpr3
+    %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+    %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+    %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+    %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+    %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
+    %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec
+    %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec
+...




More information about the llvm-commits mailing list