[llvm] 432720f - [GlobalISel] Combine sext([sz]ext) -> [sz]ext, zext(zext) -> zext
Dominik Montada via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 8 02:24:35 PDT 2020
Author: Dominik Montada
Date: 2020-04-08T11:24:29+02:00
New Revision: 432720f1c4c6b47edfb475f8616e471d14c26974
URL: https://github.com/llvm/llvm-project/commit/432720f1c4c6b47edfb475f8616e471d14c26974
DIFF: https://github.com/llvm/llvm-project/commit/432720f1c4c6b47edfb475f8616e471d14c26974.diff
LOG: [GlobalISel] Combine sext([sz]ext) -> [sz]ext, zext(zext) -> zext
Summary:
Combine sext(zext x) to (zext x) since the sign-bit is 0
after the zero-extension.
Combine sext(sext x) to (sext x) and ext(zext x) to (zext x)
since the intermediate step is not needed.
Reviewers: arsenm, volkan, aemerson, aditya_nandakumar
Reviewed By: arsenm
Subscribers: jvesely, wdng, nhaehnle, rovka, kerbowa, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D77210
Added:
Modified:
llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index ae82e98f5326..4c12bf70e1b7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -96,7 +96,8 @@ class LegalizationArtifactCombiner {
bool tryCombineZExt(MachineInstr &MI,
SmallVectorImpl<MachineInstr *> &DeadInsts,
- SmallVectorImpl<Register> &UpdatedDefs) {
+ SmallVectorImpl<Register> &UpdatedDefs,
+ GISelObserverWrapper &Observer) {
assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
Builder.setInstr(MI);
@@ -121,6 +122,18 @@ class LegalizationArtifactCombiner {
return true;
}
+ // zext(zext x) -> (zext x)
+ Register ZextSrc;
+ if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) {
+ LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
+ Observer.changingInstr(MI);
+ MI.getOperand(1).setReg(ZextSrc);
+ Observer.changedInstr(MI);
+ UpdatedDefs.push_back(DstReg);
+ markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+ return true;
+ }
+
// Try to fold zext(g_constant) when the larger constant type is legal.
// Can't use MIPattern because we don't have a specific constant in mind.
auto *SrcMI = MRI.getVRegDef(SrcReg);
@@ -162,6 +175,21 @@ class LegalizationArtifactCombiner {
markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
return true;
}
+
+ // sext(zext x) -> (zext x)
+ // sext(sext x) -> (sext x)
+ Register ExtSrc;
+ MachineInstr *ExtMI;
+ if (mi_match(SrcReg, MRI,
+ m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)),
+ m_GSExt(m_Reg(ExtSrc)))))) {
+ LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
+ Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
+ UpdatedDefs.push_back(DstReg);
+ markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+ return true;
+ }
+
return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
}
@@ -582,7 +610,7 @@ class LegalizationArtifactCombiner {
Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs);
break;
case TargetOpcode::G_ZEXT:
- Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs);
+ Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
break;
case TargetOpcode::G_SEXT:
Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
@@ -657,15 +685,13 @@ class LegalizationArtifactCombiner {
}
}
- /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
- /// dead due to MI being killed, then mark DefMI as dead too.
- /// Some of the combines (extends(trunc)), try to walk through redundant
- /// copies in between the extends and the truncs, and this attempts to collect
- /// the in between copies if they're dead.
- void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
- SmallVectorImpl<MachineInstr *> &DeadInsts) {
- DeadInsts.push_back(&MI);
-
+ /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI
+ /// (either by killing it or changing operands) results in DefMI being dead
+ /// too. In-between COPYs or artifact-casts are also collected if they are
+ /// dead.
+ /// MI is not marked dead.
+ void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts) {
// Collect all the copy instructions that are made dead, due to deleting
// this instruction. Collect all of them until the Trunc(DefMI).
// Eg,
@@ -696,6 +722,17 @@ class LegalizationArtifactCombiner {
DeadInsts.push_back(&DefMI);
}
+ /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
+ /// dead due to MI being killed, then mark DefMI as dead too.
+ /// Some of the combines (extends(trunc)), try to walk through redundant
+ /// copies in between the extends and the truncs, and this attempts to collect
+ /// the in between copies if they're dead.
+ void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
+ SmallVectorImpl<MachineInstr *> &DeadInsts) {
+ DeadInsts.push_back(&MI);
+ markDefDead(MI, DefMI, DeadInsts);
+ }
+
/// Erase the dead instructions in the list and call the observer hooks.
/// Normally the Legalizer will deal with erasing instructions that have been
/// marked dead. However, for the trunc(ext(x)) cases we can end up trying to
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir
index a4468ad32f59..2c9b239f1d54 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir
@@ -35,3 +35,51 @@ body: |
%2:_(s64) = G_ZEXT %1
$vgpr0_vgpr1 = COPY %2
...
+
+---
+name: test_zext_zext_i32_i48_i64
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: test_zext_zext_i32_i48_i64
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s48) = G_ZEXT %0
+ %2:_(s64) = G_ZEXT %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_sext_zext_i32_i48_i64
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: test_sext_zext_i32_i48_i64
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s48) = G_ZEXT %0
+ %2:_(s64) = G_SEXT %1
+ $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_sext_sext_i32_i48_i64
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; CHECK-LABEL: name: test_sext_sext_i32_i48_i64
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32)
+ ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s48) = G_SEXT %0
+ %2:_(s64) = G_SEXT %1
+ $vgpr0_vgpr1 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
index 70956fac27d0..5b90d467a731 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
@@ -18,24 +18,16 @@ body: |
; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 7
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 7
- ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
- ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8
- ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32)
- ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]]
- ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 7
- ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 7
- ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32)
- ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8
- ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32)
- ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8
- ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 7
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 7
+ ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]]
; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
- ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
- ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
; CHECK: $vgpr0 = COPY [[AND]](s32)
; CHECK: $vgpr1 = COPY [[ZEXT]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
index 803ca6819db1..a911ac506d29 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
@@ -18,24 +18,16 @@ body: |
; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 7
; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 7
- ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
- ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8
- ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32)
- ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8
- ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]]
- ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
- ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 7
- ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 7
- ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32)
- ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8
- ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32)
- ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8
- ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]]
+ ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
+ ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+ ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 7
+ ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 7
+ ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]]
; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
- ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
- ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+ ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+ ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
; CHECK: $vgpr0 = COPY [[AND]](s32)
; CHECK: $vgpr1 = COPY [[ZEXT]](s32)
More information about the llvm-commits
mailing list