[llvm] 5d6d9b6 - [GlobalISel] Propagate extends through G_PHIs into the incoming value blocks.
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 12 11:52:58 PST 2021
Author: Amara Emerson
Date: 2021-02-12T11:52:52-08:00
New Revision: 5d6d9b63a30843457a6139ff07ea9d664bebc988
URL: https://github.com/llvm/llvm-project/commit/5d6d9b63a30843457a6139ff07ea9d664bebc988
DIFF: https://github.com/llvm/llvm-project/commit/5d6d9b63a30843457a6139ff07ea9d664bebc988.diff
LOG: [GlobalISel] Propagate extends through G_PHIs into the incoming value blocks.
This combine tries to do inter-block hoisting of extends of G_PHIs, into the
originating blocks of the phi's incoming value. The idea is to expose further
optimization opportunities that are normally obscured by the PHI.
Some basic heuristics, and a target hook for AArch64 is added, to allow tuning.
E.g. if the extend is used by a G_PTR_ADD, it doesn't perform this combine
since it may be folded into the addressing mode during selection.
There are very minor code size improvements on AArch64 -Os, but the real benefit
is that it unlocks optimizations like AArch64 conditional compares on some
benchmarks.
Differential Revision: https://reviews.llvm.org/D95703
Added:
llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir
Modified:
llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/include/llvm/CodeGen/TargetInstrInfo.h
llvm/include/llvm/Target/GlobalISel/Combine.td
llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
llvm/lib/Target/AArch64/AArch64InstrInfo.h
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 9c72b90522a0..3ae7cb8ea67c 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -486,6 +486,9 @@ class CombinerHelper {
bool applyLoadOrCombine(MachineInstr &MI,
std::function<void(MachineIRBuilder &)> &MatchInfo);
+ bool matchExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
+ bool applyExtendThroughPhis(MachineInstr &MI, MachineInstr *&ExtMI);
+
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.
bool tryCombine(MachineInstr &MI);
diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 8db4e2c5b787..bbd1ca35af3c 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1929,6 +1929,14 @@ class TargetInstrInfo : public MCInstrInfo {
virtual Optional<ParamLoadedValue> describeLoadedValue(const MachineInstr &MI,
Register Reg) const;
+ /// Given the generic extension instruction \p ExtMI, returns true if this
+ /// extension is a likely candidate for being folded into an another
+ /// instruction.
+ virtual bool isExtendLikelyToBeFolded(MachineInstr &ExtMI,
+ MachineRegisterInfo &MRI) const {
+ return false;
+ }
+
/// Return MIR formatter to format/parse MIR operands. Target can override
/// this virtual function and return target specific MIR formatter.
virtual const MIRFormatter *getMIRFormatter() const {
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index e2c7a90a1b16..07b331d71357 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -553,6 +553,13 @@ def load_or_combine : GICombineRule<
[{ return Helper.matchLoadOrCombine(*${root}, ${info}); }]),
(apply [{ return Helper.applyLoadOrCombine(*${root}, ${info}); }])>;
+def extend_through_phis_matchdata: GIDefMatchData<"MachineInstr*">;
+def extend_through_phis : GICombineRule<
+ (defs root:$root, extend_through_phis_matchdata:$matchinfo),
+ (match (wip_match_opcode G_PHI):$root,
+ [{ return Helper.matchExtendThroughPhis(*${root}, ${matchinfo}); }]),
+ (apply [{ return Helper.applyExtendThroughPhis(*${root}, ${matchinfo}); }])>;
+
// Currently only the one combine above.
def insert_vec_elt_combines : GICombineGroup<
[combine_insert_vec_elts_build_vector]>;
@@ -579,6 +586,8 @@ def known_bits_simplifications : GICombineGroup<[
def width_reduction_combines : GICombineGroup<[reduce_shl_of_extend]>;
+def phi_combines : GICombineGroup<[extend_through_phis]>;
+
def select_combines : GICombineGroup<[select_undef_cmp, select_constant_cmp]>;
def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
@@ -586,7 +595,7 @@ def trivial_combines : GICombineGroup<[copy_prop, mul_to_shl, add_p2i_to_ptradd,
def all_combines : GICombineGroup<[trivial_combines, insert_vec_elt_combines,
ptr_add_immed_chain, combines_for_extload, combine_indexed_load_store,
- undef_combines, identity_combines, simplify_add_to_sub,
+ undef_combines, identity_combines, phi_combines, simplify_add_to_sub,
hoist_logic_op_with_same_opcode_hands,
shl_ashr_to_sext_inreg, sext_inreg_of_load,
width_reduction_combines, select_combines,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 29b617d027a3..2a2c505fdfed 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
@@ -13,6 +14,7 @@
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -3560,6 +3562,108 @@ bool CombinerHelper::matchLoadOrCombine(
return true;
}
+bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
+ MachineInstr *&ExtMI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PHI);
+
+ Register DstReg = MI.getOperand(0).getReg();
+
+ // TODO: Extending a vector may be expensive, don't do this until heuristics
+ // are better.
+ if (MRI.getType(DstReg).isVector())
+ return false;
+
+ // Try to match a phi, whose only use is an extend.
+ if (!MRI.hasOneNonDBGUse(DstReg))
+ return false;
+ ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
+ switch (ExtMI->getOpcode()) {
+ case TargetOpcode::G_ANYEXT:
+ return true; // G_ANYEXT is usually free.
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_SEXT:
+ break;
+ default:
+ return false;
+ }
+
+ // If the target is likely to fold this extend away, don't propagate.
+ if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
+ return false;
+
+ // We don't want to propagate the extends unless there's a good chance that
+ // they'll be optimized in some way.
+ // Collect the unique incoming values.
+ SmallPtrSet<MachineInstr *, 4> InSrcs;
+ for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
+ auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
+ switch (DefMI->getOpcode()) {
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_ANYEXT:
+ case TargetOpcode::G_CONSTANT:
+ InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
+ // Don't try to propagate if there are too many places to create new
+ // extends, chances are it'll increase code size.
+ if (InSrcs.size() > 2)
+ return false;
+ break;
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+
+bool CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
+ MachineInstr *&ExtMI) {
+ assert(MI.getOpcode() == TargetOpcode::G_PHI);
+ Register DstReg = ExtMI->getOperand(0).getReg();
+ LLT ExtTy = MRI.getType(DstReg);
+
+ // Propagate the extension into the block of each incoming reg's block.
+ // Use a SetVector here because PHIs can have duplicate edges, and we want
+ // deterministic iteration order.
+ SmallSetVector<MachineInstr *, 8> SrcMIs;
+ SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
+ for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
+ auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
+ if (!SrcMIs.insert(SrcMI))
+ continue;
+
+ // Build an extend after each src inst.
+ auto *MBB = SrcMI->getParent();
+ MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
+ if (InsertPt != MBB->end() && InsertPt->isPHI())
+ InsertPt = MBB->getFirstNonPHI();
+
+ Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
+ Builder.setDebugLoc(MI.getDebugLoc());
+ auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
+ SrcMI->getOperand(0).getReg());
+ OldToNewSrcMap[SrcMI] = NewExt;
+ }
+
+ // Create a new phi with the extended inputs.
+ Builder.setInstrAndDebugLoc(MI);
+ auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
+ NewPhi.addDef(DstReg);
+ for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
+ auto &MO = MI.getOperand(SrcIdx);
+ if (!MO.isReg()) {
+ NewPhi.addMBB(MO.getMBB());
+ continue;
+ }
+ auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
+ NewPhi.addUse(NewSrc->getOperand(0).getReg());
+ }
+ Builder.insertInstr(NewPhi);
+ ExtMI->eraseFromParent();
+ return true;
+}
+
bool CombinerHelper::applyLoadOrCombine(
MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
Builder.setInstrAndDebugLoc(MI);
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index 0e60795caf47..6fd0dc58a470 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -7171,6 +7171,26 @@ AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
return TargetInstrInfo::describeLoadedValue(MI, Reg);
}
+bool AArch64InstrInfo::isExtendLikelyToBeFolded(
+ MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
+ assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
+ ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
+ ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
+
+ // Anyexts are nops.
+ if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
+ return true;
+
+ Register DefReg = ExtMI.getOperand(0).getReg();
+ if (!MRI.hasOneNonDBGUse(DefReg))
+ return false;
+
+ // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
+ // addressing mode.
+ auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
+ return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
+}
+
uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
return get(Opc).TSFlags & AArch64::ElementSizeMask;
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index 3406bc175132..ed38dda208c8 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -301,6 +301,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
unsigned int getTailDuplicateSize(CodeGenOpt::Level OptLevel) const override;
+ bool isExtendLikelyToBeFolded(MachineInstr &ExtMI,
+ MachineRegisterInfo &MRI) const override;
+
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset,
int64_t &NumBytes,
int64_t &NumPredicateVectors,
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir
new file mode 100644
index 000000000000..1653d9157732
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/prelegalizercombiner-prop-extends-phi.mir
@@ -0,0 +1,448 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64-apple-darwin -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -global-isel -verify-machineinstrs %s -o - | FileCheck %s
+
+# Check that we propagate the G_SEXT to the sources of the phi operand.
+---
+name: sext_icst_through_phi
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: sext_icst_through_phi
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK: G_BR %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32)
+ ; CHECK: G_BR %bb.3
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32)
+ ; CHECK: bb.3:
+ ; CHECK: %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2
+ ; CHECK: $x0 = COPY %ext(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ bb.1.entry:
+ liveins: $w0, $w1
+
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %zero:_(s32) = G_CONSTANT i32 0
+ %one:_(s32) = G_CONSTANT i32 2
+ %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2:
+ %cst32_4:_(s32) = G_CONSTANT i32 4
+ G_BR %bb.4
+
+ bb.3:
+ %cst32_10:_(s32) = G_CONSTANT i32 10
+
+ bb.4:
+ %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3
+ %ext:_(s64) = G_SEXT %phi
+ $x0 = COPY %ext(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+# Check that we propagate the G_ZEXT to the sources of the phi operand.
+---
+name: zext_icst_through_phi
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: zext_icst_through_phi
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK: G_BR %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
+ ; CHECK: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT %cst32_4(s32)
+ ; CHECK: G_BR %bb.3
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK: [[ZEXT1:%[0-9]+]]:_(s64) = G_ZEXT %cst32_10(s32)
+ ; CHECK: bb.3:
+ ; CHECK: %ext:_(s64) = G_PHI [[ZEXT]](s64), %bb.1, [[ZEXT1]](s64), %bb.2
+ ; CHECK: $x0 = COPY %ext(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ bb.1.entry:
+ liveins: $w0, $w1
+
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %zero:_(s32) = G_CONSTANT i32 0
+ %one:_(s32) = G_CONSTANT i32 2
+ %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2:
+ %cst32_4:_(s32) = G_CONSTANT i32 4
+ G_BR %bb.4
+
+ bb.3:
+ %cst32_10:_(s32) = G_CONSTANT i32 10
+
+ bb.4:
+ %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3
+ %ext:_(s64) = G_ZEXT %phi
+ $x0 = COPY %ext(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+# Don't handle vectors because of potential cost issues.
+---
+name: sext_load_through_phi_vector
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: sext_load_through_phi_vector
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $x0, $q0, $q1
+ ; CHECK: %ptr:_(p0) = COPY $x0
+ ; CHECK: %cmp:_(s1) = G_IMPLICIT_DEF
+ ; CHECK: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK: G_BR %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16)
+ ; CHECK: G_BR %bb.3
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16)
+ ; CHECK: bb.3:
+ ; CHECK: %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.1, %ld2(<4 x s32>), %bb.2
+ ; CHECK: %ext:_(<4 x s64>) = G_SEXT %phi(<4 x s32>)
+ ; CHECK: G_STORE %ext(<4 x s64>), %ptr(p0) :: (store 16)
+ ; CHECK: RET_ReallyLR
+ bb.1.entry:
+ liveins: $x0, $q0, $q1
+
+ %0:_(<4 x s32>) = COPY $q0
+ %1:_(<4 x s32>) = COPY $q1
+ %ptr:_(p0) = COPY $x0
+ %cmp:_(s1) = G_IMPLICIT_DEF
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2:
+ %ld1:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16)
+ G_BR %bb.4
+
+ bb.3:
+ %ld2:_(<4 x s32>) = G_LOAD %ptr(p0) :: (load 16)
+
+ bb.4:
+ %phi:_(<4 x s32>) = G_PHI %ld1(<4 x s32>), %bb.2, %ld2(<4 x s32>), %bb.3
+ %ext:_(<4 x s64>) = G_SEXT %phi
+ G_STORE %ext(<4 x s64>), %ptr(p0) :: (store 16)
+ RET_ReallyLR
+
+...
+
+
+# Check that we don't propagate if the extend is used by a G_PTR_ADD, which on
+# AArch64 has a good chance of folding in the extend.
+---
+name: sext_icst_through_phi_used_by_ptradd
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: %base:_(p0) = COPY $x2
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK: G_BR %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
+ ; CHECK: G_BR %bb.3
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK: bb.3:
+ ; CHECK: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2
+ ; CHECK: %ext:_(s64) = G_SEXT %phi(s32)
+ ; CHECK: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64)
+ ; CHECK: $x0 = COPY %ptr(p0)
+ ; CHECK: RET_ReallyLR implicit $x0
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %base:_(p0) = COPY $x2
+ %zero:_(s32) = G_CONSTANT i32 0
+ %one:_(s32) = G_CONSTANT i32 2
+ %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2:
+ %cst32_4:_(s32) = G_CONSTANT i32 4
+ G_BR %bb.4
+
+ bb.3:
+ %cst32_10:_(s32) = G_CONSTANT i32 10
+
+ bb.4:
+ %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3
+ %ext:_(s64) = G_SEXT %phi
+ %ptr:_(p0) = G_PTR_ADD %base, %ext
+ $x0 = COPY %ptr(p0)
+ RET_ReallyLR implicit $x0
+
+...
+
+# Same as above but we do it here because the extend has multiple users, so the
+# it probably won't cost extra instructions if we remove it.
+---
+name: sext_icst_through_phi_used_by_ptradd_multiuse
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: sext_icst_through_phi_used_by_ptradd_multiuse
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $w0, $w1, $x2
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: %base:_(p0) = COPY $x2
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK: G_BR %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
+ ; CHECK: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT %cst32_4(s32)
+ ; CHECK: G_BR %bb.3
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK: [[SEXT1:%[0-9]+]]:_(s64) = G_SEXT %cst32_10(s32)
+ ; CHECK: bb.3:
+ ; CHECK: %ext:_(s64) = G_PHI [[SEXT]](s64), %bb.1, [[SEXT1]](s64), %bb.2
+ ; CHECK: %ptr:_(p0) = G_PTR_ADD %base, %ext(s64)
+ ; CHECK: $x0 = COPY %ptr(p0)
+ ; CHECK: $x1 = COPY %ext(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ bb.1.entry:
+ liveins: $w0, $w1, $x2
+
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %base:_(p0) = COPY $x2
+ %zero:_(s32) = G_CONSTANT i32 0
+ %one:_(s32) = G_CONSTANT i32 2
+ %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2:
+ %cst32_4:_(s32) = G_CONSTANT i32 4
+ G_BR %bb.4
+
+ bb.3:
+ %cst32_10:_(s32) = G_CONSTANT i32 10
+
+ bb.4:
+ %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3
+ %ext:_(s64) = G_SEXT %phi
+ %ptr:_(p0) = G_PTR_ADD %base, %ext
+ $x0 = COPY %ptr(p0)
+ $x1 = COPY %ext(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+# Check we don't propagate if there are more than 2 unique incoming values in the phi.
+# Doing so might cause too much code bloat.
+---
+name: zext_icst_through_phi_too_many_incoming
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: zext_icst_through_phi_too_many_incoming
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK: G_BR %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.3(0x40000000), %bb.4(0x40000000)
+ ; CHECK: %cst32_4:_(s32) = G_CONSTANT i32 4
+ ; CHECK: %cond:_(s1) = G_IMPLICIT_DEF
+ ; CHECK: G_BRCOND %cond(s1), %bb.3
+ ; CHECK: G_BR %bb.4
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.4(0x80000000)
+ ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK: G_BR %bb.4
+ ; CHECK: bb.3:
+ ; CHECK: successors: %bb.4(0x80000000)
+ ; CHECK: %cst32_42:_(s32) = G_CONSTANT i32 42
+ ; CHECK: bb.4:
+ ; CHECK: %phi:_(s32) = G_PHI %cst32_4(s32), %bb.1, %cst32_10(s32), %bb.2, %cst32_42(s32), %bb.3
+ ; CHECK: %ext:_(s64) = G_ZEXT %phi(s32)
+ ; CHECK: $x0 = COPY %ext(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ bb.1.entry:
+ liveins: $w0, $w1
+
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %zero:_(s32) = G_CONSTANT i32 0
+ %one:_(s32) = G_CONSTANT i32 2
+ %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2:
+ %cst32_4:_(s32) = G_CONSTANT i32 4
+ %cond:_(s1) = G_IMPLICIT_DEF
+ G_BRCOND %cond, %bb.5
+ G_BR %bb.4
+
+ bb.3:
+ %cst32_10:_(s32) = G_CONSTANT i32 10
+ G_BR %bb.4
+
+ bb.5:
+ %cst32_42:_(s32) = G_CONSTANT i32 42
+
+ bb.4:
+ %phi:_(s32) = G_PHI %cst32_4(s32), %bb.2, %cst32_10(s32), %bb.3, %cst32_42(s32), %bb.5
+ %ext:_(s64) = G_ZEXT %phi
+ $x0 = COPY %ext(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+# Check that we don't propagate if the extension would be of a non-allowed inst.
+---
+name: sext_add_through_phi
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: sext_add_through_phi
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK: G_BR %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK: G_BR %bb.3
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK: bb.3:
+ ; CHECK: %phi:_(s32) = G_PHI %add(s32), %bb.1, %cst32_10(s32), %bb.2
+ ; CHECK: %ext:_(s64) = G_SEXT %phi(s32)
+ ; CHECK: $x0 = COPY %ext(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ bb.1.entry:
+ liveins: $w0, $w1
+
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %zero:_(s32) = G_CONSTANT i32 0
+ %one:_(s32) = G_CONSTANT i32 2
+ %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2:
+ %add:_(s32) = G_ADD %0, %1
+ G_BR %bb.4
+
+ bb.3:
+ %cst32_10:_(s32) = G_CONSTANT i32 10
+
+ bb.4:
+ %phi:_(s32) = G_PHI %add(s32), %bb.2, %cst32_10(s32), %bb.3
+ %ext:_(s64) = G_SEXT %phi
+ $x0 = COPY %ext(s64)
+ RET_ReallyLR implicit $x0
+
+...
+
+# Same as above but allowed with a G_ANYEXT.
+---
+name: anyext_add_through_phi
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: anyext_add_through_phi
+ ; CHECK: bb.0.entry:
+ ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000)
+ ; CHECK: liveins: $w0, $w1
+ ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
+ ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $w1
+ ; CHECK: %one:_(s32) = G_CONSTANT i32 2
+ ; CHECK: %cmp:_(s1) = G_ICMP intpred(sle), [[COPY]](s32), %one
+ ; CHECK: G_BRCOND %cmp(s1), %bb.2
+ ; CHECK: G_BR %bb.1
+ ; CHECK: bb.1:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %add:_(s32) = G_ADD [[COPY]], [[COPY1]]
+ ; CHECK: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT %add(s32)
+ ; CHECK: G_BR %bb.3
+ ; CHECK: bb.2:
+ ; CHECK: successors: %bb.3(0x80000000)
+ ; CHECK: %cst32_10:_(s32) = G_CONSTANT i32 10
+ ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT %cst32_10(s32)
+ ; CHECK: bb.3:
+ ; CHECK: %ext:_(s64) = G_PHI [[ANYEXT]](s64), %bb.1, [[ANYEXT1]](s64), %bb.2
+ ; CHECK: $x0 = COPY %ext(s64)
+ ; CHECK: RET_ReallyLR implicit $x0
+ bb.1.entry:
+ liveins: $w0, $w1
+
+ %0:_(s32) = COPY $w0
+ %1:_(s32) = COPY $w1
+ %zero:_(s32) = G_CONSTANT i32 0
+ %one:_(s32) = G_CONSTANT i32 2
+ %cmp:_(s1) = G_ICMP intpred(sgt), %0(s32), %one
+ G_BRCOND %cmp(s1), %bb.2
+ G_BR %bb.3
+
+ bb.2:
+ %add:_(s32) = G_ADD %0, %1
+ G_BR %bb.4
+
+ bb.3:
+ %cst32_10:_(s32) = G_CONSTANT i32 10
+
+ bb.4:
+ %phi:_(s32) = G_PHI %add(s32), %bb.2, %cst32_10(s32), %bb.3
+ %ext:_(s64) = G_ANYEXT %phi
+ $x0 = COPY %ext(s64)
+ RET_ReallyLR implicit $x0
+
+...
More information about the llvm-commits
mailing list