[llvm] r371384 - GlobalISel: add combiner to form indexed loads.
Tim Northover via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 9 03:04:23 PDT 2019
Author: tnorthover
Date: Mon Sep 9 03:04:23 2019
New Revision: 371384
URL: http://llvm.org/viewvc/llvm-project?rev=371384&view=rev
Log:
GlobalISel: add combiner to form indexed loads.
Loosely based on DAGCombiner version, but this part is slightly simpler in
GlobalIsel because all address calculation is performed by G_GEP. That makes
the inc/dec distinction moot so there's just pre/post to think about.
No targets can handle it yet so testing is via a special flag that overrides
target hooks.
Added:
llvm/trunk/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll
Modified:
llvm/trunk/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
llvm/trunk/include/llvm/CodeGen/TargetLowering.h
llvm/trunk/include/llvm/Support/TargetOpcodes.def
llvm/trunk/include/llvm/Target/GenericOpcodes.td
llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp
llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/trunk/lib/Target/AArch64/AArch64.h
llvm/trunk/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
llvm/trunk/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll
llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
Modified: llvm/trunk/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/GlobalISel/CombinerHelper.h?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (original)
+++ llvm/trunk/include/llvm/CodeGen/GlobalISel/CombinerHelper.h Mon Sep 9 03:04:23 2019
@@ -28,6 +28,7 @@ class MachineRegisterInfo;
class MachineInstr;
class MachineOperand;
class GISelKnownBits;
+class MachineDominatorTree;
struct PreferredTuple {
LLT Ty; // The result type of the extend.
@@ -41,10 +42,12 @@ protected:
MachineRegisterInfo &MRI;
GISelChangeObserver &Observer;
GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
public:
CombinerHelper(GISelChangeObserver &Observer, MachineIRBuilder &B,
- GISelKnownBits *KB = nullptr);
+ GISelKnownBits *KB = nullptr,
+ MachineDominatorTree *MDT = nullptr);
/// MachineRegisterInfo::replaceRegWith() and inform the observer of the changes
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const;
@@ -60,17 +63,61 @@ public:
bool matchCombineCopy(MachineInstr &MI);
void applyCombineCopy(MachineInstr &MI);
+ /// \brief \returns true if \p DefMI precedes \p UseMI or they are the same
+ /// instruction. Both must be in the same basic block.
+ bool isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI);
+
+ /// \brief \returns true if \p DefMI dominates \p UseMI. By definition an
+ /// instruction dominates itself.
+ ///
+ /// If we haven't been provided with a MachineDominatorTree during
+ /// construction, this function returns a conservative result that tracks just
+ /// a single basic block.
+ bool dominates(MachineInstr &DefMI, MachineInstr &UseMI);
+
/// If \p MI is extend that consumes the result of a load, try to combine it.
/// Returns true if MI changed.
bool tryCombineExtendingLoads(MachineInstr &MI);
bool matchCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
void applyCombineExtendingLoads(MachineInstr &MI, PreferredTuple &MatchInfo);
+ /// Combine \p MI into a pre-indexed or post-indexed load/store operation if
+ /// legal and the surrounding code makes it useful.
+ bool tryCombineIndexedLoadStore(MachineInstr &MI);
+
bool matchCombineBr(MachineInstr &MI);
bool tryCombineBr(MachineInstr &MI);
/// Optimize memcpy intrinsics et al, e.g. constant len calls.
/// /p MaxLen if non-zero specifies the max length of a mem libcall to inline.
+ ///
+ /// For example (pre-indexed):
+ ///
+ /// $addr = G_GEP $base, $offset
+ /// [...]
+ /// $val = G_LOAD $addr
+ /// [...]
+ /// $whatever = COPY $addr
+ ///
+ /// -->
+ ///
+ /// $val, $addr = G_INDEXED_LOAD $base, $offset, 1 (IsPre)
+ /// [...]
+ /// $whatever = COPY $addr
+ ///
+ /// or (post-indexed):
+ ///
+ /// G_STORE $val, $base
+ /// [...]
+ /// $addr = G_GEP $base, $offset
+ /// [...]
+ /// $whatever = COPY $addr
+ ///
+ /// -->
+ ///
+ /// $addr = G_INDEXED_STORE $val, $base, $offset
+ /// [...]
+ /// $whatever = COPY $addr
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen = 0);
/// Try to transform \p MI by using all of the above
@@ -87,6 +134,20 @@ private:
bool IsVolatile);
bool optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
unsigned KnownLen, unsigned DstAlign, bool IsVolatile);
+
+ /// Given a non-indexed load or store instruction \p MI, find an offset that
+ /// can be usefully and legally folded into it as a post-indexing operation.
+ ///
+ /// \returns true if a candidate is found.
+ bool findPostIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
+ Register &Offset);
+
+ /// Given a non-indexed load or store instruction \p MI, find an offset that
+ /// can be usefully and legally folded into it as a pre-indexing operation.
+ ///
+ /// \returns true if a candidate is found.
+ bool findPreIndexCandidate(MachineInstr &MI, Register &Addr, Register &Base,
+ Register &Offset);
};
} // namespace llvm
Modified: llvm/trunk/include/llvm/CodeGen/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/TargetLowering.h?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h Mon Sep 9 03:04:23 2019
@@ -2959,6 +2959,14 @@ public:
return false;
}
+ /// Returns true if the specified base+offset is a legal indexed addressing
+ /// mode for this target. \p MI is the load or store instruction that is being
+ /// considered for transformation.
+ virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
+ bool IsPre, MachineRegisterInfo &MRI) const {
+ return false;
+ }
+
/// Return the entry encoding for a jump table in the current function. The
/// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
virtual unsigned getJumpTableEncoding() const;
Modified: llvm/trunk/include/llvm/Support/TargetOpcodes.def
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/TargetOpcodes.def?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/TargetOpcodes.def (original)
+++ llvm/trunk/include/llvm/Support/TargetOpcodes.def Mon Sep 9 03:04:23 2019
@@ -294,9 +294,21 @@ HANDLE_TARGET_OPCODE(G_SEXTLOAD)
/// Generic zeroext load
HANDLE_TARGET_OPCODE(G_ZEXTLOAD)
+/// Generic indexed load (including anyext load)
+HANDLE_TARGET_OPCODE(G_INDEXED_LOAD)
+
+/// Generic indexed signext load
+HANDLE_TARGET_OPCODE(G_INDEXED_SEXTLOAD)
+
+/// Generic indexed zeroext load
+HANDLE_TARGET_OPCODE(G_INDEXED_ZEXTLOAD)
+
/// Generic store.
HANDLE_TARGET_OPCODE(G_STORE)
+/// Generic indexed store.
+HANDLE_TARGET_OPCODE(G_INDEXED_STORE)
+
/// Generic atomic cmpxchg with internal success check.
HANDLE_TARGET_OPCODE(G_ATOMIC_CMPXCHG_WITH_SUCCESS)
Modified: llvm/trunk/include/llvm/Target/GenericOpcodes.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/GenericOpcodes.td?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/GenericOpcodes.td (original)
+++ llvm/trunk/include/llvm/Target/GenericOpcodes.td Mon Sep 9 03:04:23 2019
@@ -788,6 +788,32 @@ def G_ZEXTLOAD : GenericInstruction {
let mayLoad = 1;
}
+// Generic indexed load. Combines a GEP with a load. $newaddr is set to $base + $offset.
+// If $am is 0 (post-indexed), then the value is loaded from $base; if $am is 1 (pre-indexed)
+// then the value is loaded from $newaddr.
+def G_INDEXED_LOAD : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
+ let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+}
+
+// Same as G_INDEXED_LOAD except that the load performed is sign-extending, as with G_SEXTLOAD.
+def G_INDEXED_SEXTLOAD : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
+ let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+}
+
+// Same as G_INDEXED_LOAD except that the load performed is zero-extending, as with G_ZEXTLOAD.
+def G_INDEXED_ZEXTLOAD : GenericInstruction {
+ let OutOperandList = (outs type0:$dst, ptype1:$newaddr);
+ let InOperandList = (ins ptype1:$base, type2:$offset, unknown:$am);
+ let hasSideEffects = 0;
+ let mayLoad = 1;
+}
+
// Generic store. Expects a MachineMemOperand in addition to explicit operands.
def G_STORE : GenericInstruction {
let OutOperandList = (outs);
@@ -795,6 +821,15 @@ def G_STORE : GenericInstruction {
let hasSideEffects = 0;
let mayStore = 1;
}
+
+// Combines a store with a GEP. See description of G_INDEXED_LOAD for indexing behaviour.
+def G_INDEXED_STORE : GenericInstruction {
+ let OutOperandList = (outs ptype0:$newaddr);
+ let InOperandList = (ins type1:$src, ptype0:$base, ptype2:$offset,
+ unknown:$am);
+ let hasSideEffects = 0;
+ let mayStore = 1;
+}
// Generic atomic cmpxchg with internal success check. Expects a
// MachineMemOperand in addition to explicit operands.
Modified: llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp (original)
+++ llvm/trunk/lib/CodeGen/GlobalISel/CombinerHelper.cpp Mon Sep 9 03:04:23 2019
@@ -11,6 +11,7 @@
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -22,10 +23,19 @@
using namespace llvm;
+// Option to allow testing of the combiner while no targets know about indexed
+// addressing.
+static cl::opt<bool>
+ ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
+ cl::desc("Force all indexed operations to be "
+ "legal for the GlobalISel combiner"));
+
+
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
- MachineIRBuilder &B, GISelKnownBits *KB)
+ MachineIRBuilder &B, GISelKnownBits *KB,
+ MachineDominatorTree *MDT)
: Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
- KB(KB) {
+ KB(KB), MDT(MDT) {
(void)this->KB;
}
@@ -349,6 +359,204 @@ void CombinerHelper::applyCombineExtendi
Observer.changedInstr(MI);
}
+bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
+ assert(DefMI.getParent() == UseMI.getParent());
+ if (&DefMI == &UseMI)
+ return false;
+
+ // Loop through the basic block until we find one of the instructions.
+ MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
+ for (; &*I != &DefMI && &*I != &UseMI; ++I)
+ return &*I == &DefMI;
+
+ llvm_unreachable("Block must contain instructions");
+}
+
+bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
+ if (MDT)
+ return MDT->dominates(&DefMI, &UseMI);
+ else if (DefMI.getParent() != UseMI.getParent())
+ return false;
+
+ return isPredecessor(DefMI, UseMI);
+}
+
+bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
+ Register &Base, Register &Offset) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+ unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+ Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+
+ Base = MI.getOperand(1).getReg();
+ MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
+ if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
+
+ for (auto &Use : MRI.use_instructions(Base)) {
+ if (Use.getOpcode() != TargetOpcode::G_GEP)
+ continue;
+
+ Offset = Use.getOperand(2).getReg();
+ if (!ForceLegalIndexing &&
+ !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
+ LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
+ << Use);
+ continue;
+ }
+
+ // Make sure the offset calculation is before the potentially indexed op.
+ // FIXME: we really care about dependency here. The offset calculation might
+ // be movable.
+ MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
+ if (!OffsetDef || !dominates(*OffsetDef, MI)) {
+ LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
+ << Use);
+ continue;
+ }
+
+ // FIXME: check whether all uses of Base are load/store with foldable
+ // addressing modes. If so, using the normal addr-modes is better than
+ // forming an indexed one.
+
+ bool MemOpDominatesAddrUses = true;
+ for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
+ if (!dominates(MI, GEPUse)) {
+ MemOpDominatesAddrUses = false;
+ break;
+ }
+ }
+
+ if (!MemOpDominatesAddrUses) {
+ LLVM_DEBUG(
+ dbgs() << " Ignoring candidate as memop does not dominate uses: "
+ << Use);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << " Found match: " << Use);
+ Addr = Use.getOperand(0).getReg();
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
+ Register &Base, Register &Offset) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+ unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+ Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+
+ Addr = MI.getOperand(1).getReg();
+ MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
+ if (!AddrDef || MRI.hasOneUse(Addr))
+ return false;
+
+ Base = AddrDef->getOperand(1).getReg();
+ Offset = AddrDef->getOperand(2).getReg();
+
+ LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
+
+ if (!ForceLegalIndexing &&
+ !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
+ LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
+ return false;
+ }
+
+ MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
+ if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
+ return false;
+ }
+
+ if (MI.getOpcode() == TargetOpcode::G_STORE) {
+ // Would require a copy.
+ if (Base == MI.getOperand(0).getReg()) {
+ LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
+ return false;
+ }
+
+ // We're expecting one use of Addr in MI, but it could also be the
+ // value stored, which isn't actually dominated by the instruction.
+ if (MI.getOperand(0).getReg() == Addr) {
+ LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
+ return false;
+ }
+ }
+
+ // FIXME: check whether all uses of the base pointer are constant GEPs. That
+ // might allow us to end base's liveness here by adjusting the constant.
+
+ for (auto &UseMI : MRI.use_instructions(Addr)) {
+ if (!dominates(MI, UseMI)) {
+ LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
+ Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
+ return false;
+
+ bool IsStore = Opcode == TargetOpcode::G_STORE;
+ Register Addr, Base, Offset;
+ bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
+ if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
+ return false;
+
+
+ unsigned NewOpcode;
+ switch (Opcode) {
+ case TargetOpcode::G_LOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_LOAD;
+ break;
+ case TargetOpcode::G_SEXTLOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
+ break;
+ case TargetOpcode::G_ZEXTLOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
+ break;
+ case TargetOpcode::G_STORE:
+ NewOpcode = TargetOpcode::G_INDEXED_STORE;
+ break;
+ default:
+ llvm_unreachable("Unknown load/store opcode");
+ }
+
+ MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
+ MachineIRBuilder MIRBuilder(MI);
+ auto MIB = MIRBuilder.buildInstr(NewOpcode);
+ if (IsStore) {
+ MIB.addDef(Addr);
+ MIB.addUse(MI.getOperand(0).getReg());
+ } else {
+ MIB.addDef(MI.getOperand(0).getReg());
+ MIB.addDef(Addr);
+ }
+
+ MIB.addUse(Base);
+ MIB.addUse(Offset);
+ MIB.addImm(IsPre);
+ MI.eraseFromParent();
+ AddrDef.eraseFromParent();
+
+ LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
+ return true;
+}
+
bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
// Try to match the following:
@@ -909,5 +1117,9 @@ bool CombinerHelper::tryCombineMemCpyFam
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
- return tryCombineExtendingLoads(MI);
+ if (tryCombineExtendingLoads(MI))
+ return true;
+ if (tryCombineIndexedLoadStore(MI))
+ return true;
+ return false;
}
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp Mon Sep 9 03:04:23 2019
@@ -36,7 +36,7 @@ using namespace llvm;
/// NOTE: The TargetMachine owns TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm)
- : TargetLoweringBase(tm) {}
+ : TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
Modified: llvm/trunk/lib/Target/AArch64/AArch64.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64.h?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64.h (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64.h Mon Sep 9 03:04:23 2019
@@ -55,7 +55,7 @@ FunctionPass *createAArch64CollectLOHPas
InstructionSelector *
createAArch64InstructionSelector(const AArch64TargetMachine &,
AArch64Subtarget &, AArch64RegisterBankInfo &);
-FunctionPass *createAArch64PreLegalizeCombiner();
+FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone);
FunctionPass *createAArch64StackTaggingPass(bool MergeInit);
FunctionPass *createAArch64StackTaggingPreRAPass();
Modified: llvm/trunk/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64PreLegalizerCombiner.cpp Mon Sep 9 03:04:23 2019
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/Support/Debug.h"
@@ -29,13 +30,14 @@ using namespace MIPatternMatch;
namespace {
class AArch64PreLegalizerCombinerInfo : public CombinerInfo {
GISelKnownBits *KB;
+ MachineDominatorTree *MDT;
public:
AArch64PreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
- GISelKnownBits *KB)
+ GISelKnownBits *KB, MachineDominatorTree *MDT)
: CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
/*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
- KB(KB) {}
+ KB(KB), MDT(MDT) {}
virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
MachineIRBuilder &B) const override;
};
@@ -43,7 +45,7 @@ public:
bool AArch64PreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
MachineInstr &MI,
MachineIRBuilder &B) const {
- CombinerHelper Helper(Observer, B, KB);
+ CombinerHelper Helper(Observer, B, KB, MDT);
switch (MI.getOpcode()) {
default:
@@ -54,8 +56,14 @@ bool AArch64PreLegalizerCombinerInfo::co
return Helper.tryCombineBr(MI);
case TargetOpcode::G_LOAD:
case TargetOpcode::G_SEXTLOAD:
- case TargetOpcode::G_ZEXTLOAD:
- return Helper.tryCombineExtendingLoads(MI);
+ case TargetOpcode::G_ZEXTLOAD: {
+ bool Changed = false;
+ Changed |= Helper.tryCombineExtendingLoads(MI);
+ Changed |= Helper.tryCombineIndexedLoadStore(MI);
+ return Changed;
+ }
+ case TargetOpcode::G_STORE:
+ return Helper.tryCombineIndexedLoadStore(MI);
case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS:
switch (MI.getIntrinsicID()) {
case Intrinsic::memcpy:
@@ -83,13 +91,15 @@ class AArch64PreLegalizerCombiner : publ
public:
static char ID;
- AArch64PreLegalizerCombiner();
+ AArch64PreLegalizerCombiner(bool IsOptNone = false);
StringRef getPassName() const override { return "AArch64PreLegalizerCombiner"; }
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override;
+private:
+ bool IsOptNone;
};
}
@@ -99,10 +109,15 @@ void AArch64PreLegalizerCombiner::getAna
getSelectionDAGFallbackAnalysisUsage(AU);
AU.addRequired<GISelKnownBitsAnalysis>();
AU.addPreserved<GISelKnownBitsAnalysis>();
+ if (!IsOptNone) {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
MachineFunctionPass::getAnalysisUsage(AU);
}
-AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner() : MachineFunctionPass(ID) {
+AArch64PreLegalizerCombiner::AArch64PreLegalizerCombiner(bool IsOptNone)
+ : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
initializeAArch64PreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
}
@@ -115,8 +130,10 @@ bool AArch64PreLegalizerCombiner::runOnM
bool EnableOpt =
MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
+ MachineDominatorTree *MDT =
+ IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
- F.hasMinSize(), KB);
+ F.hasMinSize(), KB, MDT);
Combiner C(PCInfo, TPC);
return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
}
@@ -133,7 +150,7 @@ INITIALIZE_PASS_END(AArch64PreLegalizerC
namespace llvm {
-FunctionPass *createAArch64PreLegalizeCombiner() {
- return new AArch64PreLegalizerCombiner();
+FunctionPass *createAArch64PreLegalizeCombiner(bool IsOptNone) {
+ return new AArch64PreLegalizerCombiner(IsOptNone);
}
} // end namespace llvm
Modified: llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64TargetMachine.cpp Mon Sep 9 03:04:23 2019
@@ -504,7 +504,8 @@ bool AArch64PassConfig::addIRTranslator(
}
void AArch64PassConfig::addPreLegalizeMachineIR() {
- addPass(createAArch64PreLegalizeCombiner());
+ bool IsOptNone = getOptLevel() == CodeGenOpt::None;
+ addPass(createAArch64PreLegalizeCombiner(IsOptNone));
}
bool AArch64PassConfig::addLegalizeMachineIR() {
Added: llvm/trunk/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll?rev=371384&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll (added)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/combiner-load-store-indexing.ll Mon Sep 9 03:04:23 2019
@@ -0,0 +1,182 @@
+; RUN: llc -mtriple=arm64-apple-ios -global-isel -global-isel-abort=1 -verify-machineinstrs -stop-after=aarch64-prelegalizer-combiner -force-legal-indexing %s -o - | FileCheck %s
+
+define i8* @test_simple_load_pre(i8* %ptr) {
+; CHECK-LABEL: name: test_simple_load_pre
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
+; CHECK-NOT: G_GEP
+; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1
+; CHECK: $x0 = COPY [[NEXT]](p0)
+
+ %next = getelementptr i8, i8* %ptr, i32 42
+ load volatile i8, i8* %next
+ ret i8* %next
+}
+
+define void @test_load_multiple_dominated(i8* %ptr, i1 %tst, i1 %tst2) {
+; CHECK-LABEL: name: test_load_multiple_dominated
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
+; CHECK-NOT: G_GEP
+; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 1
+; CHECK: $x0 = COPY [[NEXT]](p0)
+ %next = getelementptr i8, i8* %ptr, i32 42
+ br i1 %tst, label %do_load, label %end
+
+do_load:
+ load volatile i8, i8* %next
+ br i1 %tst2, label %bb1, label %bb2
+
+bb1:
+ store volatile i8* %next, i8** undef
+ ret void
+
+bb2:
+ call void @bar(i8* %next)
+ ret void
+
+end:
+ ret void
+}
+
+define i8* @test_simple_store_pre(i8* %ptr) {
+; CHECK-LABEL: name: test_simple_store_pre
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: [[VAL:%.*]]:_(s8) = G_CONSTANT i8 0
+; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
+; CHECK-NOT: G_GEP
+; CHECK: [[NEXT:%.*]]:_(p0) = G_INDEXED_STORE [[VAL]](s8), [[BASE]], [[OFFSET]](s64), 1
+; CHECK: $x0 = COPY [[NEXT]](p0)
+
+ %next = getelementptr i8, i8* %ptr, i32 42
+ store volatile i8 0, i8* %next
+ ret i8* %next
+}
+
+; The potentially pre-indexed address is used as the value stored. Converting
+; would produce the value too late but only by one instruction.
+define i64** @test_store_pre_val_loop(i64** %ptr) {
+; CHECK-LABEL: name: test_store_pre_val_loop
+; CHECK: G_GEP
+; CHECK: G_STORE %
+
+ %next = getelementptr i64*, i64** %ptr, i32 42
+ %next.p0 = bitcast i64** %next to i64*
+ store volatile i64* %next.p0, i64** %next
+ ret i64** %next
+}
+
+; Potentially pre-indexed address is used between GEP computing it and load.
+define i8* @test_load_pre_before(i8* %ptr) {
+; CHECK-LABEL: name: test_load_pre_before
+; CHECK: G_GEP
+; CHECK: BL @bar
+; CHECK: G_LOAD %
+
+ %next = getelementptr i8, i8* %ptr, i32 42
+ call void @bar(i8* %next)
+ load volatile i8, i8* %next
+ ret i8* %next
+}
+
+; Materializing the base into a writable register (from sp/fp) would be just as
+; bad as the original GEP.
+define i8* @test_alloca_load_pre() {
+; CHECK-LABEL: name: test_alloca_load_pre
+; CHECK: G_GEP
+; CHECK: G_LOAD %
+
+ %ptr = alloca i8, i32 128
+ %next = getelementptr i8, i8* %ptr, i32 42
+ load volatile i8, i8* %next
+ ret i8* %next
+}
+
+; Load does not dominate use of its address. No indexing.
+define i8* @test_pre_nodom(i8* %in, i1 %tst) {
+; CHECK-LABEL: name: test_pre_nodom
+; CHECK: G_GEP
+; CHECK: G_LOAD %
+
+ %next = getelementptr i8, i8* %in, i32 16
+ br i1 %tst, label %do_indexed, label %use_addr
+
+do_indexed:
+ %val = load i8, i8* %next
+ store i8 %val, i8* @var
+ store i8* %next, i8** @varp8
+ br label %use_addr
+
+use_addr:
+ ret i8* %next
+}
+
+define i8* @test_simple_load_post(i8* %ptr) {
+; CHECK-LABEL: name: test_simple_load_post
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: [[OFFSET:%.*]]:_(s64) = G_CONSTANT i64 42
+; CHECK-NOT: G_GEP
+; CHECK: {{%.*}}:_(s8), [[NEXT:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0
+; CHECK: $x0 = COPY [[NEXT]](p0)
+
+ %next = getelementptr i8, i8* %ptr, i32 42
+ load volatile i8, i8* %ptr
+ ret i8* %next
+}
+
+define i8* @test_simple_load_post_gep_after(i8* %ptr) {
+; CHECK-LABEL: name: test_simple_load_post_gep_after
+; CHECK: [[BASE:%.*]]:_(p0) = COPY $x0
+; CHECK: BL @get_offset
+; CHECK: [[OFFSET:%.*]]:_(s64) = COPY $x0
+; CHECK: {{%.*}}:_(s8), [[ADDR:%.*]]:_(p0) = G_INDEXED_LOAD [[BASE]], [[OFFSET]](s64), 0
+; CHECK: $x0 = COPY [[ADDR]](p0)
+
+ %offset = call i64 @get_offset()
+ load volatile i8, i8* %ptr
+ %next = getelementptr i8, i8* %ptr, i64 %offset
+ ret i8* %next
+}
+
+define i8* @test_load_post_keep_looking(i8* %ptr) {
+; CHECK: name: test_load_post_keep_looking
+; CHECK: G_INDEXED_LOAD
+
+ %offset = call i64 @get_offset()
+ load volatile i8, i8* %ptr
+ %intval = ptrtoint i8* %ptr to i8
+ store i8 %intval, i8* @var
+
+ %next = getelementptr i8, i8* %ptr, i64 %offset
+ ret i8* %next
+}
+
+; Base is frame index. Using indexing would need copy anyway.
+define i8* @test_load_post_alloca() {
+; CHECK-LABEL: name: test_load_post_alloca
+; CHECK: G_GEP
+; CHECK: G_LOAD %
+
+ %ptr = alloca i8, i32 128
+ %next = getelementptr i8, i8* %ptr, i32 42
+ load volatile i8, i8* %ptr
+ ret i8* %next
+}
+
+; Offset computation does not dominate the load we might be indexing.
+define i8* @test_load_post_gep_offset_after(i8* %ptr) {
+; CHECK-LABEL: name: test_load_post_gep_offset_after
+; CHECK: G_LOAD %
+; CHECK: BL @get_offset
+; CHECK: G_GEP
+
+ load volatile i8, i8* %ptr
+ %offset = call i64 @get_offset()
+ %next = getelementptr i8, i8* %ptr, i64 %offset
+ ret i8* %next
+}
+
+declare void @bar(i8*)
+declare i64 @get_offset()
+ at var = global i8 0
+ at varp8 = global i8* null
Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll Mon Sep 9 03:04:23 2019
@@ -16,15 +16,15 @@
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
; RUN: -verify-machineinstrs=0 -global-isel \
-; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix NOFALLBACK
+; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix NOFALLBACK --check-prefix ENABLED-O1
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
; RUN: -verify-machineinstrs=0 -global-isel -global-isel-abort=2 \
-; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix FALLBACK
+; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix FALLBACK --check-prefix ENABLED-O1
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=3 \
-; RUN: | FileCheck %s --check-prefix ENABLED
+; RUN: | FileCheck %s --check-prefix ENABLED --check-prefix ENABLED-O1
; RUN: llc -mtriple=aarch64-- -debug-pass=Structure %s -o /dev/null 2>&1 \
; RUN: -verify-machineinstrs=0 -O1 -aarch64-enable-global-isel-at-O=0 \
@@ -44,6 +44,7 @@
; ENABLED: IRTranslator
; VERIFY-NEXT: Verify generated machine code
; ENABLED-NEXT: Analysis for ComputingKnownBits
+; ENABLED-O1-NEXT: MachineDominator Tree Construction
; ENABLED-NEXT: PreLegalizerCombiner
; VERIFY-NEXT: Verify generated machine code
; ENABLED-NEXT: Analysis containing CSE Info
Modified: llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir?rev=371384&r1=371383&r2=371384&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir (original)
+++ llvm/trunk/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir Mon Sep 9 03:04:23 2019
@@ -134,9 +134,21 @@
# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}}
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_INDEXED_LOAD (opcode 64): 3 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_INDEXED_SEXTLOAD (opcode 65): 3 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: G_INDEXED_ZEXTLOAD (opcode 66): 3 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_STORE (opcode {{[0-9]+}}): 2 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
+# DEBUG-NEXT: G_INDEXED_STORE (opcode 68): 3 type indices, 0 imm indices
+# DEBUG-NEXT: .. type index coverage check SKIPPED: no rules defined
+# DEBUG-NEXT: .. imm index coverage check SKIPPED: no rules defined
# DEBUG-NEXT: G_ATOMIC_CMPXCHG_WITH_SUCCESS (opcode {{[0-9]+}}): 3 type indices, 0 imm indices
# DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected
# DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected
More information about the llvm-commits
mailing list