[llvm] r374908 - [AMDGPU] Allow DPP combiner to work with REG_SEQUENCE
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 15 09:17:51 PDT 2019
Author: rampitec
Date: Tue Oct 15 09:17:50 2019
New Revision: 374908
URL: http://llvm.org/viewvc/llvm-project?rev=374908&view=rev
Log:
[AMDGPU] Allow DPP combiner to work with REG_SEQUENCE
Differential Revision: https://reviews.llvm.org/D68828
Modified:
llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp
llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir
Modified: llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp?rev=374908&r1=374907&r2=374908&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNDPPCombine.cpp Tue Oct 15 09:17:50 2019
@@ -41,6 +41,7 @@
#include "AMDGPUSubtarget.h"
#include "SIInstrInfo.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -155,8 +156,6 @@ MachineInstr *GCNDPPCombine::createDPPIn
RegSubRegPair CombOldVGPR,
bool CombBCZ) const {
assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
- assert(TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg() ==
- TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)->getReg());
auto OrigOp = OrigMI.getOpcode();
auto DPPOp = getDPPOp(OrigOp);
@@ -418,6 +417,7 @@ bool GCNDPPCombine::combineDPPMov(Machin
dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
+ DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
// try to reuse previous old reg if its undefined (IMPLICIT_DEF)
if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
@@ -430,13 +430,49 @@ bool GCNDPPCombine::combineDPPMov(Machin
OrigMIs.push_back(&MovMI);
bool Rollback = true;
+ SmallVector<MachineOperand*, 16> Uses;
+
for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
+ Uses.push_back(&Use);
+ }
+
+ while (!Uses.empty()) {
+ MachineOperand *Use = Uses.pop_back_val();
Rollback = true;
- auto &OrigMI = *Use.getParent();
+ auto &OrigMI = *Use->getParent();
LLVM_DEBUG(dbgs() << " try: " << OrigMI);
auto OrigOp = OrigMI.getOpcode();
+ if (OrigOp == AMDGPU::REG_SEQUENCE) {
+ Register FwdReg = OrigMI.getOperand(0).getReg();
+ unsigned FwdSubReg = 0;
+
+ if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
+ LLVM_DEBUG(dbgs() << " failed: EXEC mask should remain the same"
+ " for all uses\n");
+ break;
+ }
+
+ unsigned OpNo, E = OrigMI.getNumOperands();
+ for (OpNo = 1; OpNo < E; OpNo += 2) {
+ if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
+ FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
+ break;
+ }
+ }
+
+ if (!FwdSubReg)
+ break;
+
+ for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
+ if (Op.getSubReg() == FwdSubReg)
+ Uses.push_back(&Op);
+ }
+ RegSeqWithOpNos[&OrigMI].push_back(OpNo);
+ continue;
+ }
+
if (TII->isVOP3(OrigOp)) {
if (!TII->hasVALU32BitEncoding(OrigOp)) {
LLVM_DEBUG(dbgs() << " failed: VOP3 hasn't e32 equivalent\n");
@@ -457,14 +493,14 @@ bool GCNDPPCombine::combineDPPMov(Machin
}
LLVM_DEBUG(dbgs() << " combining: " << OrigMI);
- if (&Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
+ if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
OldOpndValue, CombBCZ)) {
DPPMIs.push_back(DPPInst);
Rollback = false;
}
} else if (OrigMI.isCommutable() &&
- &Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
+ Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
auto *BB = OrigMI.getParent();
auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
BB->insert(OrigMI, NewMI);
@@ -485,9 +521,22 @@ bool GCNDPPCombine::combineDPPMov(Machin
OrigMIs.push_back(&OrigMI);
}
+ Rollback |= !Uses.empty();
+
for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
MI->eraseFromParent();
+ if (!Rollback) {
+ for (auto &S : RegSeqWithOpNos) {
+ if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
+ S.first->eraseFromParent();
+ continue;
+ }
+ while (!S.second.empty())
+ S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
+ }
+ }
+
return !Rollback;
}
Modified: llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir?rev=374908&r1=374907&r2=374908&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir (original)
+++ llvm/trunk/test/CodeGen/AMDGPU/dpp_combine.mir Tue Oct 15 09:17:50 2019
@@ -562,3 +562,159 @@ body: |
%2:vgpr_32 = V_MOV_B32_dpp undef %1:vgpr_32, undef %0:vgpr_32, 1, 15, 15, 1, implicit $exec
%3:vgpr_32 = V_CEIL_F32_e32 %2, implicit $exec
...
+
+# GCN-LABEL: name: dpp_reg_sequence_both_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %9:vgpr_32 = IMPLICIT_DEF
+# GCN: %8:vgpr_32 = IMPLICIT_DEF
+# GCN: %6:vgpr_32 = V_ADD_I32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_both_combined
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+ %1:vreg_64 = COPY $vgpr2_vgpr3
+ %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+ %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_first_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %8:vgpr_32 = IMPLICIT_DEF
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
+# GCN: %6:vgpr_32 = V_ADD_I32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_first_combined
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+ %1:vreg_64 = COPY $vgpr2_vgpr3
+ %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+ %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_second_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %8:vgpr_32 = IMPLICIT_DEF
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
+# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_second_combined
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+ %1:vreg_64 = COPY $vgpr2_vgpr3
+ %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+ %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_none_combined
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_none_combined
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+ %1:vreg_64 = COPY $vgpr2_vgpr3
+ %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
+ %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+ %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_exec_changed
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+# GCN: S_BRANCH %bb.1
+# GCN: bb.1:
+# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_exec_changed
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+ %1:vreg_64 = COPY $vgpr2_vgpr3
+ %5:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ S_BRANCH %bb.1
+
+ bb.1:
+ %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+ %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
+...
+
+# GCN-LABEL: name: dpp_reg_sequence_subreg
+# GCN: %0:vreg_64 = COPY $vgpr0_vgpr1
+# GCN: %1:vreg_64 = COPY $vgpr2_vgpr3
+# GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+# GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+# GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+# GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
+# GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
+# GCN: %7:vgpr_32 = V_ADD_I32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
+name: dpp_reg_sequence_subreg
+tracksRegLiveness: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+ %0:vreg_64 = COPY $vgpr0_vgpr1
+ %1:vreg_64 = COPY $vgpr2_vgpr3
+ %8:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
+ %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
+ %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
+ %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
+ %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec
+ %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec
+...
More information about the llvm-commits
mailing list