[llvm] 432720f - [GlobalISel] Combine sext([sz]ext) -> [sz]ext, zext(zext) -> zext

Dominik Montada via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 8 02:24:35 PDT 2020


Author: Dominik Montada
Date: 2020-04-08T11:24:29+02:00
New Revision: 432720f1c4c6b47edfb475f8616e471d14c26974

URL: https://github.com/llvm/llvm-project/commit/432720f1c4c6b47edfb475f8616e471d14c26974
DIFF: https://github.com/llvm/llvm-project/commit/432720f1c4c6b47edfb475f8616e471d14c26974.diff

LOG: [GlobalISel] Combine sext([sz]ext) -> [sz]ext, zext(zext) -> zext

Summary:
Combine sext(zext x) to (zext x) since the sign-bit is 0
after the zero-extension.

Combine sext(sext x) to (sext x) and ext(zext x) to (zext x)
since the intermediate step is not needed.

Reviewers: arsenm, volkan, aemerson, aditya_nandakumar

Reviewed By: arsenm

Subscribers: jvesely, wdng, nhaehnle, rovka, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77210

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
    llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index ae82e98f5326..4c12bf70e1b7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -96,7 +96,8 @@ class LegalizationArtifactCombiner {
 
   bool tryCombineZExt(MachineInstr &MI,
                       SmallVectorImpl<MachineInstr *> &DeadInsts,
-                      SmallVectorImpl<Register> &UpdatedDefs) {
+                      SmallVectorImpl<Register> &UpdatedDefs,
+                      GISelObserverWrapper &Observer) {
     assert(MI.getOpcode() == TargetOpcode::G_ZEXT);
 
     Builder.setInstr(MI);
@@ -121,6 +122,18 @@ class LegalizationArtifactCombiner {
       return true;
     }
 
+    // zext(zext x) -> (zext x)
+    Register ZextSrc;
+    if (mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZextSrc)))) {
+      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
+      Observer.changingInstr(MI);
+      MI.getOperand(1).setReg(ZextSrc);
+      Observer.changedInstr(MI);
+      UpdatedDefs.push_back(DstReg);
+      markDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+      return true;
+    }
+
     // Try to fold zext(g_constant) when the larger constant type is legal.
     // Can't use MIPattern because we don't have a specific constant in mind.
     auto *SrcMI = MRI.getVRegDef(SrcReg);
@@ -162,6 +175,21 @@ class LegalizationArtifactCombiner {
       markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
       return true;
     }
+
+    // sext(zext x) -> (zext x)
+    // sext(sext x) -> (sext x)
+    Register ExtSrc;
+    MachineInstr *ExtMI;
+    if (mi_match(SrcReg, MRI,
+                 m_all_of(m_MInstr(ExtMI), m_any_of(m_GZExt(m_Reg(ExtSrc)),
+                                                    m_GSExt(m_Reg(ExtSrc)))))) {
+      LLVM_DEBUG(dbgs() << ".. Combine MI: " << MI);
+      Builder.buildInstr(ExtMI->getOpcode(), {DstReg}, {ExtSrc});
+      UpdatedDefs.push_back(DstReg);
+      markInstAndDefDead(MI, *MRI.getVRegDef(SrcReg), DeadInsts);
+      return true;
+    }
+
     return tryFoldImplicitDef(MI, DeadInsts, UpdatedDefs);
   }
 
@@ -582,7 +610,7 @@ class LegalizationArtifactCombiner {
       Changed = tryCombineAnyExt(MI, DeadInsts, UpdatedDefs);
       break;
     case TargetOpcode::G_ZEXT:
-      Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs);
+      Changed = tryCombineZExt(MI, DeadInsts, UpdatedDefs, WrapperObserver);
       break;
     case TargetOpcode::G_SEXT:
       Changed = tryCombineSExt(MI, DeadInsts, UpdatedDefs);
@@ -657,15 +685,13 @@ class LegalizationArtifactCombiner {
     }
   }
 
-  /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
-  /// dead due to MI being killed, then mark DefMI as dead too.
-  /// Some of the combines (extends(trunc)), try to walk through redundant
-  /// copies in between the extends and the truncs, and this attempts to collect
-  /// the in between copies if they're dead.
-  void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
-                          SmallVectorImpl<MachineInstr *> &DeadInsts) {
-    DeadInsts.push_back(&MI);
-
+  /// Mark a def of one of MI's original operands, DefMI, as dead if changing MI
+  /// (either by killing it or changing operands) results in DefMI being dead
+  /// too. In-between COPYs or artifact-casts are also collected if they are
+  /// dead.
+  /// MI is not marked dead.
+  void markDefDead(MachineInstr &MI, MachineInstr &DefMI,
+                   SmallVectorImpl<MachineInstr *> &DeadInsts) {
     // Collect all the copy instructions that are made dead, due to deleting
     // this instruction. Collect all of them until the Trunc(DefMI).
     // Eg,
@@ -696,6 +722,17 @@ class LegalizationArtifactCombiner {
       DeadInsts.push_back(&DefMI);
   }
 
+  /// Mark MI as dead. If a def of one of MI's operands, DefMI, would also be
+  /// dead due to MI being killed, then mark DefMI as dead too.
+  /// Some of the combines (extends(trunc)), try to walk through redundant
+  /// copies in between the extends and the truncs, and this attempts to collect
+  /// the in between copies if they're dead.
+  void markInstAndDefDead(MachineInstr &MI, MachineInstr &DefMI,
+                          SmallVectorImpl<MachineInstr *> &DeadInsts) {
+    DeadInsts.push_back(&MI);
+    markDefDead(MI, DefMI, DeadInsts);
+  }
+
   /// Erase the dead instructions in the list and call the observer hooks.
   /// Normally the Legalizer will deal with erasing instructions that have been
   /// marked dead. However, for the trunc(ext(x)) cases we can end up trying to

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir
index a4468ad32f59..2c9b239f1d54 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-ext-legalizer.mir
@@ -35,3 +35,51 @@ body: |
     %2:_(s64) = G_ZEXT %1
     $vgpr0_vgpr1 = COPY %2
 ...
+
+---
+name: test_zext_zext_i32_i48_i64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_zext_zext_i32_i48_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s48) = G_ZEXT %0
+    %2:_(s64) = G_ZEXT %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_sext_zext_i32_i48_i64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_sext_zext_i32_i48_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[COPY]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[ZEXT]](s64)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s48) = G_ZEXT %0
+    %2:_(s64) = G_SEXT %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_sext_sext_i32_i48_i64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; CHECK-LABEL: name: test_sext_sext_i32_i48_i64
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[COPY]](s32)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[SEXT]](s64)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s48) = G_SEXT %0
+    %2:_(s64) = G_SEXT %1
+    $vgpr0_vgpr1 = COPY %2
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
index 70956fac27d0..5b90d467a731 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddo.mir
@@ -18,24 +18,16 @@ body: |
     ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 7
     ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
     ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 7
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
-    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32)
-    ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]]
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 7
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 7
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32)
-    ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32)
-    ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]]
+    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 7
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 7
+    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]]
     ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[ADD]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
     ; CHECK: $vgpr0 = COPY [[AND]](s32)
     ; CHECK: $vgpr1 = COPY [[ZEXT]](s32)

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
index 803ca6819db1..a911ac506d29 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubo.mir
@@ -18,24 +18,16 @@ body: |
     ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY4]], 7
     ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
     ; CHECK: [[SEXT_INREG1:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY5]], 7
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG]](s32)
-    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 8
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG1]](s32)
-    ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 8
-    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]]
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
-    ; CHECK: [[SEXT_INREG4:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY8]], 7
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SEXT_INREG5:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY9]], 7
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG4]](s32)
-    ; CHECK: [[SEXT_INREG6:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY10]], 8
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[SEXT_INREG5]](s32)
-    ; CHECK: [[SEXT_INREG7:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY11]], 8
-    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG6]](s32), [[SEXT_INREG7]]
+    ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[SEXT_INREG]](s32), [[SEXT_INREG1]]
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[SEXT_INREG2:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY6]], 7
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SEXT_INREG3:%[0-9]+]]:_(s32) = G_SEXT_INREG [[COPY7]], 7
+    ; CHECK: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[SEXT_INREG2]](s32), [[SEXT_INREG3]]
     ; CHECK: [[XOR:%[0-9]+]]:_(s1) = G_XOR [[ICMP1]], [[ICMP]]
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 127
-    ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY12]], [[C1]]
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY8]], [[C1]]
     ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[XOR]](s1)
     ; CHECK: $vgpr0 = COPY [[AND]](s32)
     ; CHECK: $vgpr1 = COPY [[ZEXT]](s32)


        


More information about the llvm-commits mailing list