[llvm] 5d6d9b6 - [GlobalISel] Propagate extends through G_PHIs into the incoming value blocks.

Amara Emerson via llvm-commits llvm-commits at lists.llvm.org
Fri Feb 12 11:52:58 PST 2021


Author: Amara Emerson
Date: 2021-02-12T11:52:52-08:00
New Revision: 5d6d9b63a30843457a6139ff07ea9d664bebc988

URL: https://github.com/llvm/llvm-project/commit/5d6d9b63a30843457a6139ff07ea9d664bebc988
DIFF: https://github.com/llvm/llvm-project/commit/5d6d9b63a30843457a6139ff07ea9d664bebc988.diff

LOG: [GlobalISel] Propagate extends through G_PHIs into the incoming value blocks.

This combine tries to do inter-block hoisting of extends of G_PHIs, into the
originating blocks of the phi's incoming value. The idea is to expose further
optimization opportunities that are normally obscured by the PHI.

Some basic heuristics, and a target hook for AArch64 is added, to allow tuning.
E.g. if the extend is used by a G_PTR_ADD, it doesn't perform this combine
since it may be folded into the addressing mode during selection.

There are very minor code size improvements on AArch64 -Os, but the real benefit
is that it unlocks optimizations like AArch64 conditional compares on some
benchmarks.

Differential Revision: https://reviews.llvm.org/D95703

Added: 
    llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir

Modified: 
    llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
    llvm/include/llvm/CodeGen/TargetInstrInfo.h
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
    llvm/lib/Target/AArch64/AArch64InstrInfo.h

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9c72b90522a0..3ae7cb8ea67c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -486,6 +486,9 @@ class CombinerHelper {
   bool applyLoadOrCombine(MachineInstr &MI,
                           std::function<void(MachineIRBuilder &)> &MatchInfo);
 
+  bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
+  bool applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
+
   /// Try to transform \p MI by using all of the above
   /// combine functions. Returns true if changed.
   bool tryCombine(MachineInstr &MI);

diff  --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8db4e2c5b787..bbd1ca35af3c 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1929,6 +1929,14 @@ class TargetInstrInfo : public MCInstrInfo {
   virtual Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
                                                          Register Reg) const;
 
+  /// Given the generic extension instruction \p ExtMI, returns true if this
+  /// extension is a likely candidate for being folded into an another
+  /// instruction.
+  virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI,
+                                        MachineRegisterInfo &MRI) const {
+    return false;
+  }
+
   /// Return MIR formatter to format/parse MIR operands.  Target can override
   /// this virtual function and return target specific MIR formatter.
   virtual const MIRFormatter *getMIRFormatter() const {

diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e2c7a90a1b16..07b331d71357 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -553,6 +553,13 @@ def load_or_combine : GICombineRule<
     [{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
   (apply [{ return Helper.applyLoadOrCombine(*${root}, ${info}); }])>;
 
+def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">;
+def extend_through_phis : GICombineRule<
+  (defs root:$root, extend_through_phis_matchdata:$matchinfo),
+  (match (wip_match_opcode G_PHI):$root,
+    [{ return Helper.matchExtendThroughPhis(*${root}, ${matchinfo}); }]),
+  (apply [{ return Helper.applyExtendThroughPhis(*${root}, ${matchinfo}); }])>;
+
 // Currently only the one combine above.
 def insert_vec_elt_combines : GICombineGroup<
                             [combine_insert_vec_elts_build_vector]>;
@@ -579,6 +586,8 @@ def known_bits_simplifications : GICombineGroup<[
 
 def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
 
+def phi_combines : GICombineGroup<[extend_through_phis]>;
+
 def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
 
 def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
@@ -586,7 +595,7 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
 
 def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
     ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store,
-    undef_combines, identity_combines, simplify_add_to_sub,
+    undef_combines, identity_combines, phi_combines, simplify_add_to_sub,
     hoist_logic_op_with_same_opcode_hands,
     shl_ashr_to_sext_inreg, sext_inreg_of_load,
     width_reduction_combines, select_combines,

diff  --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 29b617d027a3..2a2c505fdfed 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/CodeGen/GlobalISel/Combiner.h"
 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -13,6 +14,7 @@
 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineInstr.h"
@@ -3560,6 +3562,108 @@ bool CombinerHelper::matchLoadOrCombine(
   return true;
 }
 
+bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
+                                            MachineInstr *&ExtMI) {
+  assert(MI.getOpcode() == TargetOpcode::G_PHI);
+
+  Register DstReg = MI.getOperand(0).getReg();
+
+  // TODO: Extending a vector may be expensive, don't do this until heuristics
+  // are better.
+  if (MRI.getType(DstReg).isVector())
+    return false;
+
+  // Try to match a phi, whose only use is an extend.
+  if (!MRI.hasOneNonDBGUse(DstReg))
+    return false;
+  ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
+  switch (ExtMI->getOpcode()) {
+  case TargetOpcode::G_ANYEXT:
+    return true; // G_ANYEXT is usually free.
+  case TargetOpcode::G_ZEXT:
+  case TargetOpcode::G_SEXT:
+    break;
+  default:
+    return false;
+  }
+
+  // If the target is likely to fold this extend away, don't propagate.
+  if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
+    return false;
+
+  // We don't want to propagate the extends unless there's a good chance that
+  // they'll be optimized in some way.
+  // Collect the unique incoming values.
+  SmallPtrSet<MachineInstr *, 4> InSrcs;
+  for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
+    auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
+    switch (DefMI->getOpcode()) {
+    case TargetOpcode::G_LOAD:
+    case TargetOpcode::G_TRUNC:
+    case TargetOpcode::G_SEXT:
+    case TargetOpcode::G_ZEXT:
+    case TargetOpcode::G_ANYEXT:
+    case TargetOpcode::G_CONSTANT:
+      InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
+      // Don't try to propagate if there are too many places to create new
+      // extends, chances are it'll increase code size.
+      if (InSrcs.size() > 2)
+        return false;
+      break;
+    default:
+      return false;
+    }
+  }
+  return true;
+}
+
+bool CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
+                                            MachineInstr *&ExtMI) {
+  assert(MI.getOpcode() == TargetOpcode::G_PHI);
+  Register DstReg = ExtMI->getOperand(0).getReg();
+  LLT ExtTy = MRI.getType(DstReg);
+
+  // Propagate the extension into the block of each incoming reg's block.
+  // Use a SetVector here because PHIs can have duplicate edges, and we want
+  // deterministic iteration order.
+  SmallSetVector<MachineInstr *, 8> SrcMIs;
+  SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
+  for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
+    auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
+    if (!SrcMIs.insert(SrcMI))
+      continue;
+
+    // Build an extend after each src inst.
+    auto *MBB = SrcMI->getParent();
+    MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
+    if (InsertPt != MBB->end() && InsertPt->isPHI())
+      InsertPt = MBB->getFirstNonPHI();
+
+    Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
+    Builder.setDebugLoc(MI.getDebugLoc());
+    auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
+                                          SrcMI->getOperand(0).getReg());
+    OldToNewSrcMap[SrcMI] = NewExt;
+  }
+
+  // Create a new phi with the extended inputs.
+  Builder.setInstrAndDebugLoc(MI);
+  auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
+  NewPhi.addDef(DstReg);
+  for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
+    auto &MO = MI.getOperand(SrcIdx);
+    if (!MO.isReg()) {
+      NewPhi.addMBB(MO.getMBB());
+      continue;
+    }
+    auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
+    NewPhi.addUse(NewSrc->getOperand(0).getReg());
+  }
+  Builder.insertInstr(NewPhi);
+  ExtMI->eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::applyLoadOrCombine(
     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
   Builder.setInstrAndDebugLoc(MI);

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 0e60795caf47..6fd0dc58a470 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -7171,6 +7171,26 @@ AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
   return TargetInstrInfo::describeLoadedValue(MI, Reg);
 }
 
+bool AArch64InstrInfo::isExtendLikelyToBeFolded(
+    MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
+  assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
+         ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
+         ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
+
+  // Anyexts are nops.
+  if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
+    return true;
+
+  Register DefReg = ExtMI.getOperand(0).getReg();
+  if (!MRI.hasOneNonDBGUse(DefReg))
+    return false;
+
+  // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
+  // addressing mode.
+  auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
+  return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
+}
+
 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
   return get(Opc).TSFlags & AArch64::ElementSizeMask;
 }

diff  --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 3406bc175132..ed38dda208c8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -301,6 +301,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
 
   unsigned int getTailDuplicateSize(CodeGenOpt::Level OptLevel) const override;
 
+  bool isExtendLikelyToBeFolded(MachineInstr &ExtMI,
+                                MachineRegisterInfo &MRI) const override;
+
   static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
                                                   int64_t &NumBytes,
                                                   int64_t &NumPredicateVectors,

diff  --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir
new file mode 100644
index 000000000000..1653d9157732
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir
@@ -0,0 +1,448 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-apple-darwin -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+
+# Check that we propagate the G_SEXT to the sources of the phi operand.
+---
+name:            sext_icst_through_phi
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: sext_icst_through_phi
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $w0, $w1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK:   %one:_(s32) = G_CONSTANT i32 2
+  ; CHECK:   %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.2
+  ; CHECK:   G_BR %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_4:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32)
+  ; CHECK:   G_BR %bb.3
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_10:_(s32) = G_CONSTANT i32 10
+  ; CHECK:   [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32)
+  ; CHECK: bb.3:
+  ; CHECK:   %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2
+  ; CHECK:   $x0 = COPY %ext(s64)
+  ; CHECK:   RET_ReallyLR implicit $x0
+  bb.1.entry:
+    liveins: $w0, $w1
+
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %zero:_(s32) = G_CONSTANT i32 0
+    %one:_(s32) = G_CONSTANT i32 2
+    %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+
+  bb.2:
+    %cst32_4:_(s32) = G_CONSTANT i32 4
+    G_BR %bb.4
+
+  bb.3:
+    %cst32_10:_(s32) = G_CONSTANT i32 10
+
+  bb.4:
+    %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3
+    %ext:_(s64) = G_SEXT %phi
+    $x0 = COPY %ext(s64)
+    RET_ReallyLR implicit $x0
+
+...
+
+# Check that we propagate the G_ZEXT to the sources of the phi operand.
+---
+name:            zext_icst_through_phi
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: zext_icst_through_phi
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $w0, $w1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK:   %one:_(s32) = G_CONSTANT i32 2
+  ; CHECK:   %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.2
+  ; CHECK:   G_BR %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_4:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %cst32_4(s32)
+  ; CHECK:   G_BR %bb.3
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_10:_(s32) = G_CONSTANT i32 10
+  ; CHECK:   [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %cst32_10(s32)
+  ; CHECK: bb.3:
+  ; CHECK:   %ext:_(s64) = G_PHI [[ZEXT]](s64), %bb.1, [[ZEXT1]](s64), %bb.2
+  ; CHECK:   $x0 = COPY %ext(s64)
+  ; CHECK:   RET_ReallyLR implicit $x0
+  bb.1.entry:
+    liveins: $w0, $w1
+
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %zero:_(s32) = G_CONSTANT i32 0
+    %one:_(s32) = G_CONSTANT i32 2
+    %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+
+  bb.2:
+    %cst32_4:_(s32) = G_CONSTANT i32 4
+    G_BR %bb.4
+
+  bb.3:
+    %cst32_10:_(s32) = G_CONSTANT i32 10
+
+  bb.4:
+    %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3
+    %ext:_(s64) = G_ZEXT %phi
+    $x0 = COPY %ext(s64)
+    RET_ReallyLR implicit $x0
+
+...
+
+# Don't handle vectors because of potential cost issues.
+---
+name:            sext_load_through_phi_vector
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: sext_load_through_phi_vector
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $x0, $q0, $q1
+  ; CHECK:   %ptr:_(p0) = COPY $x0
+  ; CHECK:   %cmp:_(s1) = G_IMPLICIT_DEF
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.2
+  ; CHECK:   G_BR %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16)
+  ; CHECK:   G_BR %bb.3
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16)
+  ; CHECK: bb.3:
+  ; CHECK:   %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.1, %ld2(<4 x s32>), %bb.2
+  ; CHECK:   %ext:_(<4 x s64>) = G_SEXT %phi(<4 x s32>)
+  ; CHECK:   G_STORE %ext(<4 x s64>), %ptr(p0) :: (store 16)
+  ; CHECK:   RET_ReallyLR
+  bb.1.entry:
+    liveins: $x0, $q0, $q1
+
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(<4 x s32>) = COPY $q1
+    %ptr:_(p0) = COPY $x0
+    %cmp:_(s1) = G_IMPLICIT_DEF
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+
+  bb.2:
+    %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16)
+    G_BR %bb.4
+
+  bb.3:
+    %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16)
+
+  bb.4:
+    %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.2, %ld2(<4 x s32>), %bb.3
+    %ext:_(<4 x s64>) = G_SEXT %phi
+    G_STORE %ext(<4 x s64>), %ptr(p0) :: (store 16)
+    RET_ReallyLR
+
+...
+
+
+# Check that we don't propagate if the extend is used by a G_PTR_ADD, which on
+# AArch64 has a good chance of folding in the extend.
+---
+name:            sext_icst_through_phi_used_by_ptradd
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $w0, $w1, $x2
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK:   %base:_(p0) = COPY $x2
+  ; CHECK:   %one:_(s32) = G_CONSTANT i32 2
+  ; CHECK:   %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.2
+  ; CHECK:   G_BR %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_4:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   G_BR %bb.3
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_10:_(s32) = G_CONSTANT i32 10
+  ; CHECK: bb.3:
+  ; CHECK:   %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2
+  ; CHECK:   %ext:_(s64) = G_SEXT %phi(s32)
+  ; CHECK:   %ptr:_(p0) = G_PTR_ADD %base, %ext(s64)
+  ; CHECK:   $x0 = COPY %ptr(p0)
+  ; CHECK:   RET_ReallyLR implicit $x0
+  bb.1.entry:
+    liveins: $w0, $w1, $x2
+
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %base:_(p0) = COPY $x2
+    %zero:_(s32) = G_CONSTANT i32 0
+    %one:_(s32) = G_CONSTANT i32 2
+    %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+
+  bb.2:
+    %cst32_4:_(s32) = G_CONSTANT i32 4
+    G_BR %bb.4
+
+  bb.3:
+    %cst32_10:_(s32) = G_CONSTANT i32 10
+
+  bb.4:
+    %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3
+    %ext:_(s64) = G_SEXT %phi
+    %ptr:_(p0) = G_PTR_ADD %base, %ext
+    $x0 = COPY %ptr(p0)
+    RET_ReallyLR implicit $x0
+
+...
+
+# Same as above but we do it here because the extend has multiple users, so the
+# it probably won't cost extra instructions if we remove it.
+---
+name:            sext_icst_through_phi_used_by_ptradd_multiuse
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd_multiuse
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $w0, $w1, $x2
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK:   %base:_(p0) = COPY $x2
+  ; CHECK:   %one:_(s32) = G_CONSTANT i32 2
+  ; CHECK:   %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.2
+  ; CHECK:   G_BR %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_4:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32)
+  ; CHECK:   G_BR %bb.3
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_10:_(s32) = G_CONSTANT i32 10
+  ; CHECK:   [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32)
+  ; CHECK: bb.3:
+  ; CHECK:   %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2
+  ; CHECK:   %ptr:_(p0) = G_PTR_ADD %base, %ext(s64)
+  ; CHECK:   $x0 = COPY %ptr(p0)
+  ; CHECK:   $x1 = COPY %ext(s64)
+  ; CHECK:   RET_ReallyLR implicit $x0
+  bb.1.entry:
+    liveins: $w0, $w1, $x2
+
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %base:_(p0) = COPY $x2
+    %zero:_(s32) = G_CONSTANT i32 0
+    %one:_(s32) = G_CONSTANT i32 2
+    %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+
+  bb.2:
+    %cst32_4:_(s32) = G_CONSTANT i32 4
+    G_BR %bb.4
+
+  bb.3:
+    %cst32_10:_(s32) = G_CONSTANT i32 10
+
+  bb.4:
+    %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3
+    %ext:_(s64) = G_SEXT %phi
+    %ptr:_(p0) = G_PTR_ADD %base, %ext
+    $x0 = COPY %ptr(p0)
+    $x1 = COPY %ext(s64)
+    RET_ReallyLR implicit $x0
+
+...
+
+# Check we don't propagate if there are more than 2 unique incoming values in the phi.
+# Doing so might cause too much code bloat.
+---
+name:            zext_icst_through_phi_too_many_incoming
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: zext_icst_through_phi_too_many_incoming
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $w0, $w1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK:   %one:_(s32) = G_CONSTANT i32 2
+  ; CHECK:   %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.2
+  ; CHECK:   G_BR %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.3(0x40000000), %bb.4(0x40000000)
+  ; CHECK:   %cst32_4:_(s32) = G_CONSTANT i32 4
+  ; CHECK:   %cond:_(s1) = G_IMPLICIT_DEF
+  ; CHECK:   G_BRCOND %cond(s1), %bb.3
+  ; CHECK:   G_BR %bb.4
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.4(0x80000000)
+  ; CHECK:   %cst32_10:_(s32) = G_CONSTANT i32 10
+  ; CHECK:   G_BR %bb.4
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.4(0x80000000)
+  ; CHECK:   %cst32_42:_(s32) = G_CONSTANT i32 42
+  ; CHECK: bb.4:
+  ; CHECK:   %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2, %cst32_42(s32), %bb.3
+  ; CHECK:   %ext:_(s64) = G_ZEXT %phi(s32)
+  ; CHECK:   $x0 = COPY %ext(s64)
+  ; CHECK:   RET_ReallyLR implicit $x0
+  bb.1.entry:
+    liveins: $w0, $w1
+
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %zero:_(s32) = G_CONSTANT i32 0
+    %one:_(s32) = G_CONSTANT i32 2
+    %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+
+  bb.2:
+    %cst32_4:_(s32) = G_CONSTANT i32 4
+    %cond:_(s1) = G_IMPLICIT_DEF
+    G_BRCOND %cond, %bb.5
+    G_BR %bb.4
+
+  bb.3:
+    %cst32_10:_(s32) = G_CONSTANT i32 10
+    G_BR %bb.4
+
+  bb.5:
+    %cst32_42:_(s32) = G_CONSTANT i32 42
+
+  bb.4:
+    %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3, %cst32_42(s32), %bb.5
+    %ext:_(s64) = G_ZEXT %phi
+    $x0 = COPY %ext(s64)
+    RET_ReallyLR implicit $x0
+
+...
+
+# Check that we don't propagate if the extension would be of a non-allowed inst.
+---
+name:            sext_add_through_phi
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: sext_add_through_phi
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $w0, $w1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK:   %one:_(s32) = G_CONSTANT i32 2
+  ; CHECK:   %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.2
+  ; CHECK:   G_BR %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
+  ; CHECK:   G_BR %bb.3
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_10:_(s32) = G_CONSTANT i32 10
+  ; CHECK: bb.3:
+  ; CHECK:   %phi:_(s32) = G_PHI %add(s32), %bb.1, %cst32_10(s32), %bb.2
+  ; CHECK:   %ext:_(s64) = G_SEXT %phi(s32)
+  ; CHECK:   $x0 = COPY %ext(s64)
+  ; CHECK:   RET_ReallyLR implicit $x0
+  bb.1.entry:
+    liveins: $w0, $w1
+
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %zero:_(s32) = G_CONSTANT i32 0
+    %one:_(s32) = G_CONSTANT i32 2
+    %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+
+  bb.2:
+    %add:_(s32) = G_ADD %0, %1
+    G_BR %bb.4
+
+  bb.3:
+    %cst32_10:_(s32) = G_CONSTANT i32 10
+
+  bb.4:
+    %phi:_(s32) = G_PHI %add(s32), %bb.2, %cst32_10(s32), %bb.3
+    %ext:_(s64) = G_SEXT %phi
+    $x0 = COPY %ext(s64)
+    RET_ReallyLR implicit $x0
+
+...
+
+# Same as above but allowed with a G_ANYEXT.
+---
+name:            anyext_add_through_phi
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: anyext_add_through_phi
+  ; CHECK: bb.0.entry:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   liveins: $w0, $w1
+  ; CHECK:   [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+  ; CHECK:   %one:_(s32) = G_CONSTANT i32 2
+  ; CHECK:   %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+  ; CHECK:   G_BRCOND %cmp(s1), %bb.2
+  ; CHECK:   G_BR %bb.1
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
+  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %add(s32)
+  ; CHECK:   G_BR %bb.3
+  ; CHECK: bb.2:
+  ; CHECK:   successors: %bb.3(0x80000000)
+  ; CHECK:   %cst32_10:_(s32) = G_CONSTANT i32 10
+  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %cst32_10(s32)
+  ; CHECK: bb.3:
+  ; CHECK:   %ext:_(s64) = G_PHI [[ANYEXT]](s64), %bb.1, [[ANYEXT1]](s64), %bb.2
+  ; CHECK:   $x0 = COPY %ext(s64)
+  ; CHECK:   RET_ReallyLR implicit $x0
+  bb.1.entry:
+    liveins: $w0, $w1
+
+    %0:_(s32) = COPY $w0
+    %1:_(s32) = COPY $w1
+    %zero:_(s32) = G_CONSTANT i32 0
+    %one:_(s32) = G_CONSTANT i32 2
+    %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+    G_BRCOND %cmp(s1), %bb.2
+    G_BR %bb.3
+
+  bb.2:
+    %add:_(s32) = G_ADD %0, %1
+    G_BR %bb.4
+
+  bb.3:
+    %cst32_10:_(s32) = G_CONSTANT i32 10
+
+  bb.4:
+    %phi:_(s32) = G_PHI %add(s32), %bb.2, %cst32_10(s32), %bb.3
+    %ext:_(s64) = G_ANYEXT %phi
+    $x0 = COPY %ext(s64)
+    RET_ReallyLR implicit $x0
+
+...


        


More information about the llvm-commits mailing list