[llvm] r313376 - Revert r313343 "[X86] PR32755 : Improvement in CodeGen instruction selection for LEAs."
Hans Wennborg via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 15 11:40:26 PDT 2017
Author: hans
Date: Fri Sep 15 11:40:26 2017
New Revision: 313376
URL: http://llvm.org/viewvc/llvm-project?rev=313376&view=rev
Log:
Revert r313343 "[X86] PR32755 : Improvement in CodeGen instruction selection for LEAs."
This caused PR34629: asserts firing when building Chromium. It also broke some
buildbots building test-suite as reported on the commit thread.
> Summary:
> 1/ Operand folding during complex pattern matching for LEAs has been
> extended, such that it promotes Scale to accommodate similar operand
> appearing in the DAG.
> e.g.
> T1 = A + B
> T2 = T1 + 10
> T3 = T2 + A
> For above DAG rooted at T3, X86AddressMode will no look like
> Base = B , Index = A , Scale = 2 , Disp = 10
>
> 2/ During OptimizeLEAPass down the pipeline factorization is now performed over LEAs
> so that if there is an opportunity then complex LEAs (having 3 operands)
> could be factored out.
> e.g.
> leal 1(%rax,%rcx,1), %rdx
> leal 1(%rax,%rcx,2), %rcx
> will be factored as following
> leal 1(%rax,%rcx,1), %rdx
> leal (%rdx,%rcx) , %edx
>
> 3/ Aggressive operand folding for AM based selection for LEAs is sensitive to loops,
> thus avoiding creation of any complex LEAs within a loop.
>
> Reviewers: lsaba, RKSimon, craig.topper, qcolombet
>
> Reviewed By: lsaba
>
> Subscribers: spatel, igorb, llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D35014
Modified:
llvm/trunk/include/llvm/CodeGen/MachineInstr.h
llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp
llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll
llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll
llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll
llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll
llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll
llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll
llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll
llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
llvm/trunk/test/CodeGen/X86/mul-constant-result.ll
llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll
llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineInstr.h?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/MachineInstr.h (original)
+++ llvm/trunk/include/llvm/CodeGen/MachineInstr.h Fri Sep 15 11:40:26 2017
@@ -1289,13 +1289,12 @@ public:
/// Add all implicit def and use operands to this instruction.
void addImplicitDefUseOperands(MachineFunction &MF);
+private:
/// If this instruction is embedded into a MachineFunction, return the
/// MachineRegisterInfo object for the current function, otherwise
/// return null.
MachineRegisterInfo *getRegInfo();
-private:
-
/// Unlink all of the register operands in this instruction from their
/// respective use lists. This requires that the operands already be on their
/// use lists.
Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
+++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Fri Sep 15 11:40:26 2017
@@ -300,9 +300,6 @@ public:
/// type legalization.
bool NewNodesMustHaveLegalTypes = false;
- /// Set to true for DAG of BasicBlock contained inside a loop.
- bool IsDAGPartOfLoop = false;
-
private:
/// DAGUpdateListener is a friend so it can manipulate the listener stack.
friend struct DAGUpdateListener;
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Fri Sep 15 11:40:26 2017
@@ -26,7 +26,6 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationDiagnosticInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/FastISel.h"
@@ -326,8 +325,6 @@ void SelectionDAGISel::getAnalysisUsage(
if (OptLevel != CodeGenOpt::None)
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<GCModuleInfo>();
- if (OptLevel != CodeGenOpt::None)
- AU.addRequired<LoopInfoWrapperPass>();
AU.addRequired<StackProtector>();
AU.addPreserved<StackProtector>();
AU.addPreserved<GCModuleInfo>();
@@ -1422,7 +1419,6 @@ void SelectionDAGISel::SelectAllBasicBlo
// Iterate over all basic blocks in the function.
for (const BasicBlock *LLVMBB : RPOT) {
- CurDAG->IsDAGPartOfLoop = false;
if (OptLevel != CodeGenOpt::None) {
bool AllPredsVisited = true;
for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
@@ -1600,13 +1596,6 @@ void SelectionDAGISel::SelectAllBasicBlo
FunctionBasedInstrumentation);
}
- if (OptLevel != CodeGenOpt::None) {
- auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
- LoopInfo &LI = LIWP.getLoopInfo();
- if (LI.getLoopFor(LLVMBB))
- CurDAG->IsDAGPartOfLoop = true;
- }
-
if (Begin != BI)
++NumDAGBlocks;
else
Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Fri Sep 15 11:40:26 2017
@@ -88,11 +88,6 @@ namespace {
IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
}
- bool hasComplexAddressingMode() const {
- return Disp && IndexReg.getNode() != nullptr &&
- Base_Reg.getNode() != nullptr;
- }
-
/// Return true if this addressing mode is already RIP-relative.
bool isRIPRelative() const {
if (BaseType != RegBase) return false;
@@ -102,10 +97,6 @@ namespace {
return false;
}
- bool isLegalScale() {
- return (Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
- }
-
void setBaseReg(SDValue Reg) {
BaseType = RegBase;
Base_Reg = Reg;
@@ -171,13 +162,10 @@ namespace {
/// If true, selector should try to optimize for minimum code size.
bool OptForMinSize;
- /// If true, selector should try to aggresively fold operands into AM.
- bool OptForAggressingFolding;
-
public:
explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(tm, OptLevel), OptForSize(false),
- OptForMinSize(false), OptForAggressingFolding(false) {}
+ OptForMinSize(false) {}
StringRef getPassName() const override {
return "X86 DAG->DAG Instruction Selection";
@@ -196,12 +184,6 @@ namespace {
void PreprocessISelDAG() override;
- void setAggressiveOperandFolding(bool val = false) {
- OptForAggressingFolding = val;
- }
-
- bool getAggressiveOperandFolding() { return OptForAggressingFolding; }
-
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
@@ -215,7 +197,6 @@ namespace {
bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth);
- bool matchAddressLEA(SDValue N, X86ISelAddressMode &AM);
bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp,
@@ -444,20 +425,6 @@ namespace {
bool matchBEXTRFromAnd(SDNode *Node);
};
-
- class X86AggressiveOperandFolding {
- public:
- explicit X86AggressiveOperandFolding(X86DAGToDAGISel &ISel, bool val)
- : Selector(&ISel) {
- Selector->setAggressiveOperandFolding(val);
- }
- ~X86AggressiveOperandFolding() {
- Selector->setAggressiveOperandFolding(false);
- }
-
- private:
- X86DAGToDAGISel *Selector;
- };
}
@@ -1171,7 +1138,7 @@ static bool foldMaskAndShiftToScale(Sele
AM.IndexReg = NewSRL;
return false;
}
-
+
bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
unsigned Depth) {
SDLoc dl(N);
@@ -1179,11 +1146,8 @@ bool X86DAGToDAGISel::matchAddressRecurs
dbgs() << "MatchAddress: ";
AM.dump();
});
-
- // Limit recursion. For aggressive operand folding recurse
- // till depth 8 which is the maximum legal scale value.
- unsigned MaxDepth = getAggressiveOperandFolding() ? 8 : 5;
- if (Depth > MaxDepth)
+ // Limit recursion.
+ if (Depth > 5)
return matchAddressBase(N, AM);
// If this is already a %rip relative address, we can only merge immediates
@@ -1474,20 +1438,6 @@ bool X86DAGToDAGISel::matchAddressBase(S
return false;
}
- if (OptLevel != CodeGenOpt::None && getAggressiveOperandFolding() &&
- AM.BaseType == X86ISelAddressMode::RegBase) {
- if (AM.Base_Reg == N) {
- SDValue Base_Reg = AM.Base_Reg;
- AM.Base_Reg = AM.IndexReg;
- AM.IndexReg = Base_Reg;
- AM.Scale++;
- return false;
- } else if (AM.IndexReg == N) {
- AM.Scale++;
- return false;
- }
- }
-
// Otherwise, we cannot select it.
return true;
}
@@ -1718,7 +1668,7 @@ bool X86DAGToDAGISel::selectLEA64_32Addr
SDValue &Disp, SDValue &Segment) {
// Save the debug loc before calling selectLEAAddr, in case it invalidates N.
SDLoc DL(N);
-
+
if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
return false;
@@ -1753,29 +1703,6 @@ bool X86DAGToDAGISel::selectLEA64_32Addr
return true;
}
-bool X86DAGToDAGISel::matchAddressLEA(SDValue N, X86ISelAddressMode &AM) {
- // Avoid enabling aggressive operand folding when node N is a part of loop.
- X86AggressiveOperandFolding Enable(*this, !CurDAG->IsDAGPartOfLoop);
-
- bool matchRes = matchAddress(N, AM);
-
- // Check for legality of scale when recursion unwinds back to the top.
- if (!matchRes) {
- if (!AM.isLegalScale())
- return true;
-
- // Avoid creating costly complex LEAs having scale less than 2
- // within loop.
- if(CurDAG->IsDAGPartOfLoop && Subtarget->slow3OpsLEA() &&
- AM.Scale <= 2 && AM.hasComplexAddressingMode() &&
- (!AM.hasSymbolicDisplacement() && N.getOpcode() < ISD::BUILTIN_OP_END))
- return true;
- }
-
- return matchRes;
-}
-
-
/// Calls SelectAddr and determines if the maximal addressing
/// mode it matches can be cost effectively emitted as an LEA instruction.
bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
@@ -1793,7 +1720,7 @@ bool X86DAGToDAGISel::selectLEAAddr(SDVa
SDValue Copy = AM.Segment;
SDValue T = CurDAG->getRegister(0, MVT::i32);
AM.Segment = T;
- if (matchAddressLEA(N, AM))
+ if (matchAddress(N, AM))
return false;
assert (T == AM.Segment);
AM.Segment = Copy;
Modified: llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp Fri Sep 15 11:40:26 2017
@@ -22,7 +22,6 @@
#include "X86Subtarget.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveVariables.h"
-#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -45,7 +44,6 @@ static cl::opt<bool>
cl::init(false));
STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
-STATISTIC(NumFactoredLEAs, "Number of LEAs factorized");
STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
/// \brief Returns true if two machine operands are identical and they are not
@@ -53,10 +51,6 @@ STATISTIC(NumRedundantLEAs, "Number of r
static inline bool isIdenticalOp(const MachineOperand &MO1,
const MachineOperand &MO2);
-/// \brief Returns true if two machine instructions have identical operands.
-static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
- const MachineOperand &MO2);
-
/// \brief Returns true if two address displacement operands are of the same
/// type and use the same symbol/index/address regardless of the offset.
static bool isSimilarDispOp(const MachineOperand &MO1,
@@ -65,44 +59,21 @@ static bool isSimilarDispOp(const Machin
/// \brief Returns true if the instruction is LEA.
static inline bool isLEA(const MachineInstr &MI);
-/// \brief Returns true if Definition of Operand is a copylike instruction.
-static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr);
-
namespace {
/// A key based on instruction's memory operands.
class MemOpKey {
public:
MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
const MachineOperand *Index, const MachineOperand *Segment,
- const MachineOperand *Disp, bool DispCheck = false)
- : Disp(Disp), DeepCheck(DispCheck) {
+ const MachineOperand *Disp)
+ : Disp(Disp) {
Operands[0] = Base;
Operands[1] = Scale;
Operands[2] = Index;
Operands[3] = Segment;
}
- /// Checks operands of MemOpKey are identical, if Base or Index
- /// operand definitions are of kind SUBREG_TO_REG then compare
- /// operands of defining MI.
- bool performDeepCheck(const MemOpKey &Other) const {
- MachineInstr *MI = const_cast<MachineInstr *>(Operands[0]->getParent());
- MachineRegisterInfo *MRI = MI->getRegInfo();
-
- for (int i = 0; i < 4; i++) {
- bool copyLike = isDefCopyLike(MRI, *Operands[i]);
- if (copyLike && !isIdenticalMI(MRI, *Operands[i], *Other.Operands[i]))
- return false;
- else if (!copyLike && !isIdenticalOp(*Operands[i], *Other.Operands[i]))
- return false;
- }
- return isIdenticalOp(*Disp, *Other.Disp);
- }
-
bool operator==(const MemOpKey &Other) const {
- if (DeepCheck)
- return performDeepCheck(Other);
-
// Addresses' bases, scales, indices and segments must be identical.
for (int i = 0; i < 4; ++i)
if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
@@ -120,12 +91,6 @@ public:
// Address' displacement operand.
const MachineOperand *Disp;
-
- // If true checks Address' base, index, segment and
- // displacement are identical, in additions if base/index
- // are defined by copylike instruction then futher
- // compare the operands of the defining instruction.
- bool DeepCheck;
};
} // end anonymous namespace
@@ -149,34 +114,12 @@ template <> struct DenseMapInfo<MemOpKey
static unsigned getHashValue(const MemOpKey &Val) {
// Checking any field of MemOpKey is enough to determine if the key is
// empty or tombstone.
- hash_code Hash(0);
assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
assert(Val.Disp != PtrInfo::getTombstoneKey() &&
"Cannot hash the tombstone key");
- auto getMIHash = [](MachineInstr *MI) -> hash_code {
- hash_code h(0);
- for (unsigned i = 1, e = MI->getNumOperands(); i < e; i++)
- h = hash_combine(h, MI->getOperand(i));
- return h;
- };
-
- const MachineOperand &Base = *Val.Operands[0];
- const MachineOperand &Index = *Val.Operands[2];
- MachineInstr *MI = const_cast<MachineInstr *>(Base.getParent());
- MachineRegisterInfo *MRI = MI->getRegInfo();
-
- if (isDefCopyLike(MRI, Base))
- Hash = getMIHash(MRI->getVRegDef(Base.getReg()));
- else
- Hash = hash_combine(Hash, Base);
-
- if (isDefCopyLike(MRI, Index))
- Hash = getMIHash(MRI->getVRegDef(Index.getReg()));
- else
- Hash = hash_combine(Hash, Index);
-
- Hash = hash_combine(Hash, *Val.Operands[1], *Val.Operands[3]);
+ hash_code Hash = hash_combine(*Val.Operands[0], *Val.Operands[1],
+ *Val.Operands[2], *Val.Operands[3]);
// If the address displacement is an immediate, it should not affect the
// hash so that memory operands which differ only be immediate displacement
@@ -235,16 +178,6 @@ static inline MemOpKey getMemOpKey(const
&MI.getOperand(N + X86::AddrDisp));
}
-static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) {
- static MachineOperand DummyScale = MachineOperand::CreateImm(1);
- assert((isLEA(MI) || MI.mayLoadOrStore()) &&
- "The instruction must be a LEA, a load or a store");
- return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), &DummyScale,
- &MI.getOperand(N + X86::AddrIndexReg),
- &MI.getOperand(N + X86::AddrSegmentReg),
- &MI.getOperand(N + X86::AddrDisp), true);
-}
-
static inline bool isIdenticalOp(const MachineOperand &MO1,
const MachineOperand &MO2) {
return MO1.isIdenticalTo(MO2) &&
@@ -252,27 +185,6 @@ static inline bool isIdenticalOp(const M
!TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
}
-static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
- const MachineOperand &MO2) {
- MachineInstr *MI1 = nullptr;
- MachineInstr *MI2 = nullptr;
- if (!MO1.isReg() || !MO2.isReg())
- return false;
-
- MI1 = MRI->getVRegDef(MO1.getReg());
- MI2 = MRI->getVRegDef(MO2.getReg());
- if (!MI1 || !MI2)
- return false;
- if (MI1->getOpcode() != MI2->getOpcode())
- return false;
- if (MI1->getNumOperands() != MI2->getNumOperands())
- return false;
- for (unsigned i = 1, e = MI1->getNumOperands(); i < e; ++i)
- if (!isIdenticalOp(MI1->getOperand(i), MI2->getOperand(i)))
- return false;
- return true;
-}
-
#ifndef NDEBUG
static bool isValidDispOp(const MachineOperand &MO) {
return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
@@ -304,140 +216,7 @@ static inline bool isLEA(const MachineIn
Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
}
-static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr) {
- if (!Opr.isReg() || TargetRegisterInfo::isPhysicalRegister(Opr.getReg()))
- return false;
- MachineInstr *MI = MRI->getVRegDef(Opr.getReg());
- return MI && MI->isCopyLike();
-}
-
namespace {
-
-/// This class captures the functions and attributes
-/// needed to factorize LEA within and across basic
-/// blocks.LEA instruction with same BASE,OFFSET and
-/// INDEX are the candidates for factorization.
-class FactorizeLEAOpt {
-public:
- using LEAListT = std::list<MachineInstr *>;
- using LEAMapT = DenseMap<MemOpKey, LEAListT>;
- using ValueT = DenseMap<MemOpKey, unsigned>;
- using ScopeEntryT = std::pair<MachineBasicBlock *, ValueT>;
- using ScopeStackT = std::vector<ScopeEntryT>;
-
- FactorizeLEAOpt() = default;
- FactorizeLEAOpt(const FactorizeLEAOpt &) = delete;
- FactorizeLEAOpt &operator=(const FactorizeLEAOpt &) = delete;
-
- void performCleanup() {
- for (auto LEA : removedLEAs)
- LEA->eraseFromParent();
- LEAs.clear();
- Stack.clear();
- removedLEAs.clear();
- }
-
- LEAMapT &getLEAMap() { return LEAs; }
- ScopeEntryT *getTopScope() { return &Stack.back(); }
-
- void addForLazyRemoval(MachineInstr *Instr) { removedLEAs.insert(Instr); }
-
- bool checkIfScheduledForRemoval(MachineInstr *Instr) {
- return removedLEAs.find(Instr) != removedLEAs.end();
- }
-
- /// Push the ScopeEntry for the BasicBlock over Stack.
- /// Also traverses over list of instruction and update
- /// LEAs Map and ScopeEntry for each LEA instruction
- /// found using insertLEA().
- void pushScope(MachineBasicBlock *MBB);
-
- /// Stores the size of MachineInstr list corrosponding
- /// to key K from LEAs MAP into the ScopeEntry of
- /// the basic block, then insert the LEA at the beginning
- /// of the list.
- void insertLEA(MachineInstr *MI);
-
- /// Pops out ScopeEntry of top most BasicBlock from the stack
- /// and remove the LEA instructions contained in the scope
- /// from the LEAs Map.
- void popScope();
-
- /// If LEA contains Physical Registers then its not a candidate
- /// for factorizations since physical registers may violate SSA
- /// semantics of MI.
- bool containsPhyReg(MachineInstr *MI, unsigned RecLevel);
-
-private:
- ScopeStackT Stack;
- LEAMapT LEAs;
- std::set<MachineInstr *> removedLEAs;
-};
-
-void FactorizeLEAOpt::pushScope(MachineBasicBlock *MBB) {
- ValueT EmptyMap;
- ScopeEntryT SE = std::make_pair(MBB, EmptyMap);
- Stack.push_back(SE);
- for (auto &MI : *MBB) {
- if (isLEA(MI))
- insertLEA(&MI);
- }
-}
-
-void FactorizeLEAOpt::popScope() {
- ScopeEntryT &SE = Stack.back();
- for (auto MapEntry : SE.second) {
- LEAMapT::iterator Itr = LEAs.find(MapEntry.first);
- assert((Itr != LEAs.end()) &&
- "LEAs map must have a node corresponding to ScopeEntry's Key.");
-
- while (((*Itr).second.size() > MapEntry.second))
- (*Itr).second.pop_front();
- // If list goes empty remove entry from LEAs Map.
- if ((*Itr).second.empty())
- LEAs.erase(Itr);
- }
- Stack.pop_back();
-}
-
-bool FactorizeLEAOpt::containsPhyReg(MachineInstr *MI, unsigned RecLevel) {
- if (!MI || !RecLevel)
- return false;
-
- MachineRegisterInfo *MRI = MI->getRegInfo();
- for (auto Operand : MI->operands()) {
- if (!Operand.isReg())
- continue;
- if (TargetRegisterInfo::isPhysicalRegister(Operand.getReg()))
- return true;
- MachineInstr *OperDefMI = MRI->getVRegDef(Operand.getReg());
- if (OperDefMI && (MI != OperDefMI) && OperDefMI->isCopyLike() &&
- containsPhyReg(OperDefMI, RecLevel - 1))
- return true;
- }
- return false;
-}
-
-void FactorizeLEAOpt::insertLEA(MachineInstr *MI) {
- unsigned lsize;
- if (containsPhyReg(MI, 2))
- return;
-
- MemOpKey Key = getMemOpCSEKey(*MI, 1);
- ScopeEntryT *TopScope = getTopScope();
-
- LEAMapT::iterator Itr = LEAs.find(Key);
- if (Itr == LEAs.end()) {
- lsize = 0;
- LEAs[Key].push_front(MI);
- } else {
- lsize = (*Itr).second.size();
- (*Itr).second.push_front(MI);
- }
- if (TopScope->second.find(Key) == TopScope->second.end())
- TopScope->second[Key] = lsize;
-}
-
class OptimizeLEAPass : public MachineFunctionPass {
public:
OptimizeLEAPass() : MachineFunctionPass(ID) {}
@@ -449,12 +228,6 @@ public:
/// been calculated by LEA. Also, remove redundant LEAs.
bool runOnMachineFunction(MachineFunction &MF) override;
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineDominatorTree>();
- }
-
private:
typedef DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>> MemOpMap;
@@ -500,24 +273,8 @@ private:
/// \brief Removes LEAs which calculate similar addresses.
bool removeRedundantLEAs(MemOpMap &LEAs);
- /// \brief Visit over basic blocks, collect LEAs in a scoped
- /// hash map (FactorizeLEAOpt::LEAs) and try to factor them out.
- bool FactorizeLEAsAllBasicBlocks(MachineFunction &MF);
-
- bool FactorizeLEAsBasicBlock(MachineDomTreeNode *DN);
-
- /// \brief Factor out LEAs which share Base,Index,Offset and Segment.
- bool processBasicBlock(const MachineBasicBlock &MBB);
-
- /// \brief Try to replace LEA with a lower strength instruction
- /// to improves latency and throughput.
- bool strengthReduceLEAs(MemOpMap &LEAs, const MachineBasicBlock &MBB);
-
DenseMap<const MachineInstr *, unsigned> InstrPos;
- FactorizeLEAOpt FactorOpt;
-
- MachineDominatorTree *DT;
MachineRegisterInfo *MRI;
const X86InstrInfo *TII;
const X86RegisterInfo *TRI;
@@ -890,152 +647,6 @@ bool OptimizeLEAPass::removeRedundantLEA
return Changed;
}
-static inline int getADDrrFromLEA(int LEAOpcode) {
- switch (LEAOpcode) {
- default:
- llvm_unreachable("Unexpected LEA instruction");
- case X86::LEA16r:
- return X86::ADD16rr;
- case X86::LEA32r:
- return X86::ADD32rr;
- case X86::LEA64_32r:
- case X86::LEA64r:
- return X86::ADD64rr;
- }
-}
-
-bool OptimizeLEAPass::strengthReduceLEAs(MemOpMap &LEAs,
- const MachineBasicBlock &BB) {
- bool Changed = false;
-
- // Loop over all entries in the table.
- for (auto &E : LEAs) {
- auto &List = E.second;
-
- // Loop over all LEA pairs.
- for (auto I1 = List.begin(); I1 != List.end(); I1++) {
- MachineInstrBuilder NewMI;
- MachineInstr &First = **I1;
- MachineOperand &Res = First.getOperand(0);
- MachineOperand &Base = First.getOperand(1);
- MachineOperand &Scale = First.getOperand(2);
- MachineOperand &Index = First.getOperand(3);
- MachineOperand &Offset = First.getOperand(4);
-
- const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(First.getOpcode()));
- const DebugLoc DL = First.getDebugLoc();
-
- if (!Base.isReg() || !Index.isReg())
- continue;
- if (TargetRegisterInfo::isPhysicalRegister(Res.getReg()) ||
- TargetRegisterInfo::isPhysicalRegister(Base.getReg()) ||
- TargetRegisterInfo::isPhysicalRegister(Index.getReg()))
- continue;
-
- MachineBasicBlock &MBB = *(const_cast<MachineBasicBlock *>(&BB));
- if (Scale.isImm() && Scale.getImm() == 1) {
- // R = B + I
- if (Offset.isImm() && !Offset.getImm()) {
- NewMI = BuildMI(MBB, &First, DL, ADDrr)
- .addDef(Res.getReg())
- .addUse(Base.getReg())
- .addUse(Index.getReg());
- Changed = NewMI.getInstr() != nullptr;
- First.eraseFromParent();
- }
- }
- }
- }
- return Changed;
-}
-
-bool OptimizeLEAPass::processBasicBlock(const MachineBasicBlock &MBB) {
- bool cseDone = false;
-
- // Legal scale value (1,2,4 & 8) vector.
- int LegalScale[9] = {0, 1, 1, 0, 1, 0, 0, 0, 1};
-
- auto CompareFn = [](const MachineInstr *Arg1,
- const MachineInstr *Arg2) -> bool {
- if (Arg1->getOperand(2).getImm() < Arg2->getOperand(2).getImm())
- return false;
- return true;
- };
-
- // Loop over all entries in the table.
- for (auto &E : FactorOpt.getLEAMap()) {
- auto &List = E.second;
- if (List.size() > 1)
- List.sort(CompareFn);
-
- // Loop over all LEA pairs.
- for (auto Iter1 = List.begin(); Iter1 != List.end(); Iter1++) {
- for (auto Iter2 = std::next(Iter1); Iter2 != List.end(); Iter2++) {
- MachineInstr &LI1 = **Iter1;
- MachineInstr &LI2 = **Iter2;
-
- if (!DT->dominates(&LI2, &LI1))
- continue;
-
- int Scale1 = LI1.getOperand(2).getImm();
- int Scale2 = LI2.getOperand(2).getImm();
- assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg");
- DebugLoc DL = LI1.getDebugLoc();
-
- if (FactorOpt.checkIfScheduledForRemoval(&LI1))
- continue;
-
- int Factor = Scale1 - Scale2;
- if (Factor > 0 && LegalScale[Factor]) {
- DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump(););
- MachineInstrBuilder NewMI =
- BuildMI(*(const_cast<MachineBasicBlock *>(&MBB)), &LI1, DL,
- TII->get(LI1.getOpcode()))
- .addDef(LI1.getOperand(0).getReg()) // Dst = Dst of LI1.
- .addUse(LI2.getOperand(0).getReg()) // Base = Dst of LI2.
- .addImm(Factor) // Scale = Diff b/w scales.
- .addUse(LI1.getOperand(3).getReg()) // Index = Index of LI1.
- .addImm(0) // Disp = 0
- .addUse(
- LI1.getOperand(5).getReg()); // Segment = Segmant of LI1.
-
- cseDone = NewMI.getInstr() != nullptr;
-
- /// Lazy removal shall ensure that replaced LEA remains
- /// till we finish processing all the basic block. This shall
- /// provide opportunity for further factorization based on
- /// the replaced LEA which will be legal since it has same
- /// destination as newly formed LEA.
- FactorOpt.addForLazyRemoval(&LI1);
-
- NumFactoredLEAs++;
- DEBUG(dbgs() << "CSE LEAs: Replaced by: "; NewMI->dump(););
- }
- }
- }
- }
- return cseDone;
-}
-
-bool OptimizeLEAPass::FactorizeLEAsBasicBlock(MachineDomTreeNode *DN) {
- bool Changed = false;
- MachineBasicBlock *MBB = DN->getBlock();
- FactorOpt.pushScope(MBB);
-
- Changed |= processBasicBlock(*MBB);
- for (auto Child : DN->getChildren())
- FactorizeLEAsBasicBlock(Child);
-
- FactorOpt.popScope();
- return Changed;
-}
-
-bool OptimizeLEAPass::FactorizeLEAsAllBasicBlocks(MachineFunction &MF) {
- bool Changed = FactorizeLEAsBasicBlock(DT->getRootNode());
- FactorOpt.performCleanup();
- return Changed;
-}
-
bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
@@ -1045,10 +656,6 @@ bool OptimizeLEAPass::runOnMachineFuncti
MRI = &MF.getRegInfo();
TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
- DT = &getAnalysis<MachineDominatorTree>();
-
- // Attempt factorizing LEAs.
- Changed |= FactorizeLEAsAllBasicBlocks(MF);
// Process all basic blocks.
for (auto &MBB : MF) {
@@ -1065,9 +672,6 @@ bool OptimizeLEAPass::runOnMachineFuncti
// Remove redundant LEA instructions.
Changed |= removeRedundantLEAs(LEAs);
- // Strength reduce LEA instructions.
- Changed |= strengthReduceLEAs(LEAs, MBB);
-
// Remove redundant address calculations. Do it only for -Os/-Oz since only
// a code size gain is expected from this part of the pass.
if (MF.getFunction()->optForSize())
Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll Fri Sep 15 11:40:26 2017
@@ -405,7 +405,7 @@ define void @test_variadic_call_2(i8** %
; X32-NEXT: movl 4(%ecx), %ecx
; X32-NEXT: movl %eax, (%esp)
; X32-NEXT: movl $4, %eax
-; X32-NEXT: addl %esp, %eax
+; X32-NEXT: leal (%esp,%eax), %eax
; X32-NEXT: movl %edx, 4(%esp)
; X32-NEXT: movl %ecx, 4(%eax)
; X32-NEXT: calll variadic_callee
Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll Fri Sep 15 11:40:26 2017
@@ -5,10 +5,10 @@
define i32* @test_gep_i8(i32 *%arr, i8 %ind) {
; X64_GISEL-LABEL: test_gep_i8:
; X64_GISEL: # BB#0:
-; X64_GISEL-NEXT: movq $4, %rcx
-; X64_GISEL-NEXT: movsbq %sil, %rax
-; X64_GISEL-NEXT: imulq %rcx, %rax
-; X64_GISEL-NEXT: addq %rdi, %rax
+; X64_GISEL-NEXT: movq $4, %rax
+; X64_GISEL-NEXT: movsbq %sil, %rcx
+; X64_GISEL-NEXT: imulq %rax, %rcx
+; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax
; X64_GISEL-NEXT: retq
;
; X64-LABEL: test_gep_i8:
@@ -25,7 +25,7 @@ define i32* @test_gep_i8_const(i32 *%arr
; X64_GISEL-LABEL: test_gep_i8_const:
; X64_GISEL: # BB#0:
; X64_GISEL-NEXT: movq $80, %rax
-; X64_GISEL-NEXT: addq %rdi, %rax
+; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
; X64_GISEL-NEXT: retq
;
; X64-LABEL: test_gep_i8_const:
@@ -39,10 +39,10 @@ define i32* @test_gep_i8_const(i32 *%arr
define i32* @test_gep_i16(i32 *%arr, i16 %ind) {
; X64_GISEL-LABEL: test_gep_i16:
; X64_GISEL: # BB#0:
-; X64_GISEL-NEXT: movq $4, %rcx
-; X64_GISEL-NEXT: movswq %si, %rax
-; X64_GISEL-NEXT: imulq %rcx, %rax
-; X64_GISEL-NEXT: addq %rdi, %rax
+; X64_GISEL-NEXT: movq $4, %rax
+; X64_GISEL-NEXT: movswq %si, %rcx
+; X64_GISEL-NEXT: imulq %rax, %rcx
+; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax
; X64_GISEL-NEXT: retq
;
; X64-LABEL: test_gep_i16:
@@ -59,7 +59,7 @@ define i32* @test_gep_i16_const(i32 *%ar
; X64_GISEL-LABEL: test_gep_i16_const:
; X64_GISEL: # BB#0:
; X64_GISEL-NEXT: movq $80, %rax
-; X64_GISEL-NEXT: addq %rdi, %rax
+; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
; X64_GISEL-NEXT: retq
;
; X64-LABEL: test_gep_i16_const:
@@ -73,10 +73,10 @@ define i32* @test_gep_i16_const(i32 *%ar
define i32* @test_gep_i32(i32 *%arr, i32 %ind) {
; X64_GISEL-LABEL: test_gep_i32:
; X64_GISEL: # BB#0:
-; X64_GISEL-NEXT: movq $4, %rcx
-; X64_GISEL-NEXT: movslq %esi, %rax
-; X64_GISEL-NEXT: imulq %rcx, %rax
-; X64_GISEL-NEXT: addq %rdi, %rax
+; X64_GISEL-NEXT: movq $4, %rax
+; X64_GISEL-NEXT: movslq %esi, %rcx
+; X64_GISEL-NEXT: imulq %rax, %rcx
+; X64_GISEL-NEXT: leaq (%rdi,%rcx), %rax
; X64_GISEL-NEXT: retq
;
; X64-LABEL: test_gep_i32:
@@ -92,7 +92,7 @@ define i32* @test_gep_i32_const(i32 *%ar
; X64_GISEL-LABEL: test_gep_i32_const:
; X64_GISEL: # BB#0:
; X64_GISEL-NEXT: movq $20, %rax
-; X64_GISEL-NEXT: addq %rdi, %rax
+; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
; X64_GISEL-NEXT: retq
;
; X64-LABEL: test_gep_i32_const:
@@ -108,7 +108,7 @@ define i32* @test_gep_i64(i32 *%arr, i64
; X64_GISEL: # BB#0:
; X64_GISEL-NEXT: movq $4, %rax
; X64_GISEL-NEXT: imulq %rsi, %rax
-; X64_GISEL-NEXT: addq %rdi, %rax
+; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
; X64_GISEL-NEXT: retq
;
; X64-LABEL: test_gep_i64:
@@ -123,7 +123,7 @@ define i32* @test_gep_i64_const(i32 *%ar
; X64_GISEL-LABEL: test_gep_i64_const:
; X64_GISEL: # BB#0:
; X64_GISEL-NEXT: movq $20, %rax
-; X64_GISEL-NEXT: addq %rdi, %rax
+; X64_GISEL-NEXT: leaq (%rdi,%rax), %rax
; X64_GISEL-NEXT: retq
;
; X64-LABEL: test_gep_i64_const:
Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll (original)
+++ llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll Fri Sep 15 11:40:26 2017
@@ -181,7 +181,7 @@ define i32 @test_gep_folding_largeGepInd
; ALL-LABEL: test_gep_folding_largeGepIndex:
; ALL: # BB#0:
; ALL-NEXT: movabsq $228719476720, %rax # imm = 0x3540BE3FF0
-; ALL-NEXT: addq %rdi, %rax
+; ALL-NEXT: leaq (%rdi,%rax), %rax
; ALL-NEXT: movl %esi, (%rax)
; ALL-NEXT: movl (%rax), %eax
; ALL-NEXT: retq
Modified: llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll Fri Sep 15 11:40:26 2017
@@ -9,21 +9,29 @@ define void @test_func(%struct.SA* nocap
; X64: # BB#0: # %entry
; X64-NEXT: movl (%rdi), %eax
; X64-NEXT: movl 16(%rdi), %ecx
+; X64-NEXT: leal (%rax,%rcx), %edx
; X64-NEXT: leal 1(%rax,%rcx), %eax
; X64-NEXT: movl %eax, 12(%rdi)
-; X64-NEXT: addq %rcx, %eax
+; X64-NEXT: leal 1(%rcx,%rdx), %eax
; X64-NEXT: movl %eax, 16(%rdi)
; X64-NEXT: retq
;
; X86-LABEL: test_func:
; X86: # BB#0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi0:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .Lcfi1:
+; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl (%eax), %ecx
; X86-NEXT: movl 16(%eax), %edx
-; X86-NEXT: leal 1(%ecx,%edx), %ecx
-; X86-NEXT: movl %ecx, 12(%eax)
+; X86-NEXT: leal 1(%ecx,%edx), %esi
; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: leal 1(%edx,%ecx), %ecx
; X86-NEXT: movl %ecx, 16(%eax)
+; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
%h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
Modified: llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll Fri Sep 15 11:40:26 2017
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86
%struct.SA = type { i32 , i32 , i32 , i32 , i32};
@@ -10,41 +10,47 @@ define void @foo(%struct.SA* nocapture %
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB0_1: # %loop
; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl 16(%rdi), %eax
-; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: incl %ecx
-; X64-NEXT: movl %ecx, 12(%rdi)
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: movl 16(%rdi), %ecx
+; X64-NEXT: leal 1(%rax,%rcx), %edx
+; X64-NEXT: movl %edx, 12(%rdi)
; X64-NEXT: decl %esi
; X64-NEXT: jne .LBB0_1
; X64-NEXT: # BB#2: # %exit
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: movl %ecx, 16(%rdi)
+; X64-NEXT: addl %ecx, %eax
+; X64-NEXT: leal 1(%rcx,%rax), %eax
+; X64-NEXT: movl %eax, 16(%rdi)
; X64-NEXT: retq
;
; X86-LABEL: foo:
; X86: # BB#0: # %entry
-; X86-NEXT: pushl %esi
+; X86-NEXT: pushl %edi
; X86-NEXT: .Lcfi0:
; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: pushl %esi
; X86-NEXT: .Lcfi1:
-; X86-NEXT: .cfi_offset %esi, -8
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: .Lcfi2:
+; X86-NEXT: .cfi_offset %esi, -12
+; X86-NEXT: .Lcfi3:
+; X86-NEXT: .cfi_offset %edi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB0_1: # %loop
; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: movl 16(%eax), %edx
-; X86-NEXT: movl (%eax), %esi
-; X86-NEXT: addl %edx, %esi
-; X86-NEXT: incl %esi
-; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: movl 16(%eax), %esi
+; X86-NEXT: leal 1(%edx,%esi), %edi
+; X86-NEXT: movl %edi, 12(%eax)
; X86-NEXT: decl %ecx
; X86-NEXT: jne .LBB0_1
; X86-NEXT: # BB#2: # %exit
-; X86-NEXT: addl %edx, %esi
-; X86-NEXT: movl %esi, 16(%eax)
+; X86-NEXT: addl %esi, %edx
+; X86-NEXT: leal 1(%esi,%edx), %ecx
+; X86-NEXT: movl %ecx, 16(%eax)
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl
entry:
br label %loop
Modified: llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll Fri Sep 15 11:40:26 2017
@@ -8,7 +8,7 @@ define i32 @foo(i32 %a, i32 %b) local_un
; X64-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 4(%rdi,%rsi,2), %ecx
-; X64-NEXT: leal (%ecx,%rsi,2), %eax
+; X64-NEXT: leal 4(%rdi,%rsi,4), %eax
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: retq
;
@@ -16,9 +16,9 @@ define i32 @foo(i32 %a, i32 %b) local_un
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: leal 4(%ecx,%eax,2), %ecx
-; X86-NEXT: leal (%ecx,%eax,2), %eax
-; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: leal 4(%ecx,%eax,2), %edx
+; X86-NEXT: leal 4(%ecx,%eax,4), %eax
+; X86-NEXT: imull %edx, %eax
; X86-NEXT: retl
entry:
%mul = shl i32 %b, 1
@@ -36,7 +36,7 @@ define i32 @foo1(i32 %a, i32 %b) local_u
; X64-NEXT: # kill: %ESI<def> %ESI<kill> %RSI<def>
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal 4(%rdi,%rsi,4), %ecx
-; X64-NEXT: leal (%ecx,%rsi,4), %eax
+; X64-NEXT: leal 4(%rdi,%rsi,8), %eax
; X64-NEXT: imull %ecx, %eax
; X64-NEXT: retq
;
@@ -44,9 +44,9 @@ define i32 @foo1(i32 %a, i32 %b) local_u
; X86: # BB#0: # %entry
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: leal 4(%ecx,%eax,4), %ecx
-; X86-NEXT: leal (%ecx,%eax,4), %eax
-; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: leal 4(%ecx,%eax,4), %edx
+; X86-NEXT: leal 4(%ecx,%eax,8), %eax
+; X86-NEXT: imull %edx, %eax
; X86-NEXT: retl
entry:
%mul = shl i32 %b, 2
@@ -68,23 +68,31 @@ define i32 @foo1_mult_basic_blocks(i32 %
; X64-NEXT: cmpl $10, %ecx
; X64-NEXT: je .LBB2_2
; X64-NEXT: # BB#1: # %mid
-; X64-NEXT: leal (%ecx,%rsi,4), %eax
-; X64-NEXT: imull %ecx, %eax
+; X64-NEXT: leal 4(%rdi,%rsi,8), %eax
+; X64-NEXT: imull %eax, %ecx
+; X64-NEXT: movl %ecx, %eax
; X64-NEXT: .LBB2_2: # %exit
; X64-NEXT: retq
;
; X86-LABEL: foo1_mult_basic_blocks:
; X86: # BB#0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi0:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .Lcfi1:
+; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal 4(%eax,%edx,4), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: leal 4(%esi,%edx,4), %ecx
; X86-NEXT: xorl %eax, %eax
; X86-NEXT: cmpl $10, %ecx
; X86-NEXT: je .LBB2_2
; X86-NEXT: # BB#1: # %mid
-; X86-NEXT: leal (%ecx,%edx,4), %eax
-; X86-NEXT: imull %ecx, %eax
+; X86-NEXT: leal 4(%esi,%edx,8), %eax
+; X86-NEXT: imull %eax, %ecx
+; X86-NEXT: movl %ecx, %eax
; X86-NEXT: .LBB2_2: # %exit
+; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
%mul = shl i32 %b, 2
@@ -123,9 +131,9 @@ define i32 @foo1_mult_basic_blocks_illeg
; X86-LABEL: foo1_mult_basic_blocks_illegal_scale:
; X86: # BB#0: # %entry
; X86-NEXT: pushl %esi
-; X86-NEXT: .Lcfi0:
+; X86-NEXT: .Lcfi2:
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: .Lcfi1:
+; X86-NEXT: .Lcfi3:
; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
Modified: llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll Fri Sep 15 11:40:26 2017
@@ -1,31 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -mtriple=i686-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X86
%struct.SA = type { i32 , i32 , i32 , i32 , i32};
define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
; X64-LABEL: foo:
; X64: # BB#0: # %entry
-; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: movl 16(%rdi), %ecx
-; X64-NEXT: leal (%rax,%rcx,4), %eax
-; X64-NEXT: addl $1, %eax
-; X64-NEXT: movl %eax, 12(%rdi)
-; X64-NEXT: addl %ecx, %eax
+; X64-NEXT: movl 16(%rdi), %eax
+; X64-NEXT: movl (%rdi), %ecx
+; X64-NEXT: addl %eax, %ecx
+; X64-NEXT: addl %eax, %ecx
+; X64-NEXT: addl %eax, %ecx
+; X64-NEXT: leal (%rcx,%rax), %edx
+; X64-NEXT: leal 1(%rax,%rcx), %ecx
+; X64-NEXT: movl %ecx, 12(%rdi)
+; X64-NEXT: leal 1(%rax,%rdx), %eax
; X64-NEXT: movl %eax, 16(%rdi)
; X64-NEXT: retq
;
; X86-LABEL: foo:
; X86: # BB#0: # %entry
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi0:
+; X86-NEXT: .cfi_def_cfa_offset 8
+; X86-NEXT: .Lcfi1:
+; X86-NEXT: .cfi_offset %esi, -8
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl (%eax), %ecx
-; X86-NEXT: movl 16(%eax), %edx
-; X86-NEXT: leal (%ecx,%edx,4), %ecx
-; X86-NEXT: addl $1, %ecx
-; X86-NEXT: movl %ecx, 12(%eax)
-; X86-NEXT: addl %edx, %ecx
+; X86-NEXT: movl 16(%eax), %ecx
+; X86-NEXT: movl (%eax), %edx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: leal 1(%ecx,%edx), %esi
+; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: movl %esi, 12(%eax)
+; X86-NEXT: leal 1(%ecx,%edx), %ecx
; X86-NEXT: movl %ecx, 16(%eax)
+; X86-NEXT: popl %esi
; X86-NEXT: retl
entry:
%h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
@@ -52,15 +64,15 @@ define void @foo_loop(%struct.SA* nocapt
; X64-NEXT: .p2align 4, 0x90
; X64-NEXT: .LBB1_1: # %loop
; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: movl 16(%rdi), %eax
; X64-NEXT: movl (%rdi), %ecx
-; X64-NEXT: addl %eax, %ecx
-; X64-NEXT: incl %ecx
-; X64-NEXT: movl %ecx, 12(%rdi)
+; X64-NEXT: movl 16(%rdi), %eax
+; X64-NEXT: leal 1(%rcx,%rax), %edx
+; X64-NEXT: movl %edx, 12(%rdi)
; X64-NEXT: decl %esi
; X64-NEXT: jne .LBB1_1
; X64-NEXT: # BB#2: # %exit
; X64-NEXT: addl %eax, %ecx
+; X64-NEXT: leal 1(%rax,%rcx), %ecx
; X64-NEXT: addl %eax, %ecx
; X64-NEXT: addl %eax, %ecx
; X64-NEXT: addl %eax, %ecx
@@ -72,25 +84,30 @@ define void @foo_loop(%struct.SA* nocapt
;
; X86-LABEL: foo_loop:
; X86: # BB#0: # %entry
-; X86-NEXT: pushl %esi
-; X86-NEXT: .Lcfi0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: .Lcfi2:
; X86-NEXT: .cfi_def_cfa_offset 8
-; X86-NEXT: .Lcfi1:
-; X86-NEXT: .cfi_offset %esi, -8
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: pushl %esi
+; X86-NEXT: .Lcfi3:
+; X86-NEXT: .cfi_def_cfa_offset 12
+; X86-NEXT: .Lcfi4:
+; X86-NEXT: .cfi_offset %esi, -12
+; X86-NEXT: .Lcfi5:
+; X86-NEXT: .cfi_offset %edi, -8
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: .p2align 4, 0x90
; X86-NEXT: .LBB1_1: # %loop
; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: movl (%eax), %esi
; X86-NEXT: movl 16(%eax), %ecx
-; X86-NEXT: movl (%eax), %edx
-; X86-NEXT: addl %ecx, %edx
-; X86-NEXT: incl %edx
-; X86-NEXT: movl %edx, 12(%eax)
-; X86-NEXT: decl %esi
+; X86-NEXT: leal 1(%esi,%ecx), %edi
+; X86-NEXT: movl %edi, 12(%eax)
+; X86-NEXT: decl %edx
; X86-NEXT: jne .LBB1_1
; X86-NEXT: # BB#2: # %exit
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: addl %ecx, %esi
+; X86-NEXT: leal 1(%ecx,%esi), %edx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: addl %ecx, %edx
@@ -99,6 +116,7 @@ define void @foo_loop(%struct.SA* nocapt
; X86-NEXT: addl %ecx, %edx
; X86-NEXT: movl %edx, 16(%eax)
; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
; X86-NEXT: retl
entry:
br label %loop
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll Fri Sep 15 11:40:26 2017
@@ -558,10 +558,11 @@ define i16 @test_mul_by_28(i16 %x) {
define i16 @test_mul_by_29(i16 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (%eax,%eax,8), %ecx
-; X86-NEXT: leal (%ecx,%ecx,2), %ecx
-; X86-NEXT: leal (%ecx,%eax,2), %eax
+; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X86-NEXT: retl
;
@@ -570,7 +571,8 @@ define i16 @test_mul_by_29(i16 %x) {
; X64-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-NEXT: leal (%rdi,%rdi,8), %eax
; X64-NEXT: leal (%rax,%rax,2), %eax
-; X64-NEXT: leal (%rax,%rdi,2), %eax
+; X64-NEXT: addl %edi, %eax
+; X64-NEXT: addl %edi, %eax
; X64-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
; X64-NEXT: retq
%mul = mul nsw i16 %x, 29
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll Fri Sep 15 11:40:26 2017
@@ -1457,10 +1457,11 @@ define i32 @test_mul_by_28(i32 %x) {
define i32 @test_mul_by_29(i32 %x) {
; X86-LABEL: test_mul_by_29:
; X86: # BB#0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (%eax,%eax,8), %ecx
-; X86-NEXT: leal (%ecx,%ecx,2), %ecx
-; X86-NEXT: leal (%ecx,%eax,2), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,8), %eax
+; X86-NEXT: leal (%eax,%eax,2), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: retl
;
; X64-HSW-LABEL: test_mul_by_29:
@@ -1468,7 +1469,8 @@ define i32 @test_mul_by_29(i32 %x) {
; X64-HSW-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-HSW-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-HSW-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-HSW-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
+; X64-HSW-NEXT: addl %edi, %eax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_29:
@@ -1476,7 +1478,8 @@ define i32 @test_mul_by_29(i32 %x) {
; X64-JAG-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<def>
; X64-JAG-NEXT: leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
; X64-JAG-NEXT: leal (%rax,%rax,2), %eax # sched: [1:0.50]
-; X64-JAG-NEXT: leal (%rax,%rdi,2), %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
+; X64-JAG-NEXT: addl %edi, %eax # sched: [1:0.50]
; X64-JAG-NEXT: retq # sched: [4:1.00]
;
; X86-NOOPT-LABEL: test_mul_by_29:
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll Fri Sep 15 11:40:26 2017
@@ -1523,7 +1523,8 @@ define i64 @test_mul_by_29(i64 %x) {
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: leal (%eax,%eax,8), %ecx
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
-; X86-NEXT: leal (%ecx,%eax,2), %ecx
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: addl %eax, %ecx
; X86-NEXT: movl $29, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
; X86-NEXT: addl %ecx, %edx
@@ -1533,14 +1534,16 @@ define i64 @test_mul_by_29(i64 %x) {
; X64-HSW: # BB#0:
; X64-HSW-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-HSW-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-HSW-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
+; X64-HSW-NEXT: addq %rdi, %rax # sched: [1:0.25]
; X64-HSW-NEXT: retq # sched: [2:1.00]
;
; X64-JAG-LABEL: test_mul_by_29:
; X64-JAG: # BB#0:
; X64-JAG-NEXT: leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
; X64-JAG-NEXT: leaq (%rax,%rax,2), %rax # sched: [1:0.50]
-; X64-JAG-NEXT: leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
+; X64-JAG-NEXT: addq %rdi, %rax # sched: [1:0.50]
; X64-JAG-NEXT: retq # sched: [4:1.00]
;
; X86-NOOPT-LABEL: test_mul_by_29:
Modified: llvm/trunk/test/CodeGen/X86/mul-constant-result.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-result.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/mul-constant-result.ll (original)
+++ llvm/trunk/test/CodeGen/X86/mul-constant-result.ll Fri Sep 15 11:40:26 2017
@@ -163,7 +163,8 @@ define i32 @mult(i32, i32) local_unnamed
; X86-NEXT: .LBB0_35:
; X86-NEXT: leal (%eax,%eax,8), %ecx
; X86-NEXT: leal (%ecx,%ecx,2), %ecx
-; X86-NEXT: leal (%ecx,%eax,2), %eax
+; X86-NEXT: addl %eax, %ecx
+; X86-NEXT: addl %ecx, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
; X86-NEXT: .LBB0_36:
@@ -321,15 +322,14 @@ define i32 @mult(i32, i32) local_unnamed
; X64-HSW-NEXT: .LBB0_31:
; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
-; X64-HSW-NEXT: .LBB0_17:
-; X64-HSW-NEXT: addl %eax, %ecx
-; X64-HSW-NEXT: movl %ecx, %eax
-; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
-; X64-HSW-NEXT: retq
+; X64-HSW-NEXT: jmp .LBB0_17
; X64-HSW-NEXT: .LBB0_32:
; X64-HSW-NEXT: leal (%rax,%rax,8), %ecx
; X64-HSW-NEXT: leal (%rcx,%rcx,2), %ecx
-; X64-HSW-NEXT: leal (%rcx,%rax,2), %eax
+; X64-HSW-NEXT: addl %eax, %ecx
+; X64-HSW-NEXT: .LBB0_17:
+; X64-HSW-NEXT: addl %eax, %ecx
+; X64-HSW-NEXT: movl %ecx, %eax
; X64-HSW-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill>
; X64-HSW-NEXT: retq
; X64-HSW-NEXT: .LBB0_33:
Modified: llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll (original)
+++ llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll Fri Sep 15 11:40:26 2017
@@ -40,10 +40,10 @@ define i32 @test2(i32 %a, i32 %b) nounwi
; X64-NEXT: leal (%rdi,%rdi), %eax
; X64-NEXT: retq
entry:
- %tmp0 = add i32 %b, %a
- %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
- %tmp2 = extractvalue { i32, i1 } %tmp1, 0
- ret i32 %tmp2
+ %tmp0 = add i32 %b, %a
+ %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
+ %tmp2 = extractvalue { i32, i1 } %tmp1, 0
+ ret i32 %tmp2
}
define i32 @test3(i32 %a, i32 %b) nounwind readnone {
@@ -64,8 +64,8 @@ define i32 @test3(i32 %a, i32 %b) nounwi
; X64-NEXT: mull %ecx
; X64-NEXT: retq
entry:
- %tmp0 = add i32 %b, %a
- %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
- %tmp2 = extractvalue { i32, i1 } %tmp1, 0
- ret i32 %tmp2
+ %tmp0 = add i32 %b, %a
+ %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
+ %tmp2 = extractvalue { i32, i1 } %tmp1, 0
+ ret i32 %tmp2
}
Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll?rev=313376&r1=313375&r2=313376&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll Fri Sep 15 11:40:26 2017
@@ -13,14 +13,14 @@
; X64-NEXT: .p2align
; X64: %loop
; no complex address modes
-; X64-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
+; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
;
; X32: @simple
; no expensive address computation in the preheader
; X32-NOT: imul
; X32: %loop
; no complex address modes
-; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
entry:
br label %loop
@@ -103,7 +103,7 @@ exit:
; X32-NOT: mov{{.*}}(%esp){{$}}
; X32: %for.body{{$}}
; no complex address modes
-; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
+; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
; no reloads
; X32-NOT: (%esp)
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
More information about the llvm-commits
mailing list