[llvm] r314886 - [X86] Improvement in CodeGen instruction selection for LEAs (re-applying post required revision changes.)

Wed Oct 4 10:56:04 PDT 2017

I've reverted this in r314919 as it broke the Chromium / SQLite build.
See PR34830.

On Wed, Oct 4, 2017 at 2:02 AM, Jatin Bhateja via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: jbhateja
> Date: Wed Oct  4 02:02:10 2017
> New Revision: 314886
>
> URL: http://llvm.org/viewvc/llvm-project?rev=314886&view=rev
> Log:
> [X86] Improvement in CodeGen instruction selection for LEAs (re-applying post required revision changes.)
>
> Summary:
>    1/  Operand folding during complex pattern matching for LEAs has been
>        extended, such that it promotes Scale to accommodate similar operand
>        appearing in the DAG.
>        e.g.
>          T1 = A + B
>          T2 = T1 + 10
>          T3 = T2 + A
>        For above DAG rooted at T3, X86AddressMode will no look like
>          Base = B , Index = A , Scale = 2 , Disp = 10
>
>    2/  During OptimizeLEAPass down the pipeline factorization is now performed over LEAs
>        so that if there is an opportunity then complex LEAs (having 3 operands)
>        could be factored out.
>        e.g.
>          leal 1(%rax,%rcx,1), %rdx
>          leal 1(%rax,%rcx,2), %rcx
>        will be factored as following
>          leal 1(%rax,%rcx,1), %rdx
>          leal (%rdx,%rcx)   , %edx
>
>    3/ Aggressive operand folding for AM based selection for LEAs is sensitive to loops,
>       thus avoiding creation of any complex LEAs within a loop.
>
> Reviewers: lsaba, RKSimon, craig.topper, qcolombet, jmolloy
>
> Reviewed By: lsaba
>
> Subscribers: jmolloy, spatel, igorb, llvm-commits
>
>     Differential Revision: https://reviews.llvm.org/D35014
>
> Modified:
>     llvm/trunk/include/llvm/CodeGen/MachineInstr.h
>     llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
>     llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
>     llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
>     llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp
>     llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll
>     llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll
>     llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll
>     llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll
>     llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll
>     llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll
>     llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll
>     llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
>     llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
>     llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
>     llvm/trunk/test/CodeGen/X86/mul-constant-result.ll
>     llvm/trunk/test/CodeGen/X86/pr34629.ll
>     llvm/trunk/test/CodeGen/X86/pr34634.ll
>     llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll
>     llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
>
> Modified: llvm/trunk/include/llvm/CodeGen/MachineInstr.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/MachineInstr.h?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/MachineInstr.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/MachineInstr.h Wed Oct  4 02:02:10 2017
> @@ -1289,12 +1289,13 @@ public:
>    /// Add all implicit def and use operands to this instruction.
>    void addImplicitDefUseOperands(MachineFunction &MF);
>
> -private:
>    /// If this instruction is embedded into a MachineFunction, return the
>    /// MachineRegisterInfo object for the current function, otherwise
>    /// return null.
>    MachineRegisterInfo *getRegInfo();
>
> +private:
> +
>    /// Unlink all of the register operands in this instruction from their
>    /// respective use lists.  This requires that the operands already be on their
>    /// use lists.
>
> Modified: llvm/trunk/include/llvm/CodeGen/SelectionDAG.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/CodeGen/SelectionDAG.h?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/CodeGen/SelectionDAG.h (original)
> +++ llvm/trunk/include/llvm/CodeGen/SelectionDAG.h Wed Oct  4 02:02:10 2017
> @@ -300,6 +300,9 @@ public:
>    /// type legalization.
>    bool NewNodesMustHaveLegalTypes = false;
>
> +  /// Set to true for DAG of BasicBlock contained inside a loop.
> +  bool IsDAGPartOfLoop = false;
> +
>  private:
>    /// DAGUpdateListener is a friend so it can manipulate the listener stack.
>    friend struct DAGUpdateListener;
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp Wed Oct  4 02:02:10 2017
> @@ -26,6 +26,7 @@
>  #include "llvm/Analysis/AliasAnalysis.h"
>  #include "llvm/Analysis/BranchProbabilityInfo.h"
>  #include "llvm/Analysis/CFG.h"
> +#include "llvm/Analysis/LoopInfo.h"
>  #include "llvm/Analysis/OptimizationDiagnosticInfo.h"
>  #include "llvm/Analysis/TargetLibraryInfo.h"
>  #include "llvm/CodeGen/FastISel.h"
> @@ -325,6 +326,8 @@ void SelectionDAGISel::getAnalysisUsage(
>    if (OptLevel != CodeGenOpt::None)
>      AU.addRequired<AAResultsWrapperPass>();
>    AU.addRequired<GCModuleInfo>();
> +  if (OptLevel != CodeGenOpt::None)
> +    AU.addRequired<LoopInfoWrapperPass>();
>    AU.addRequired<StackProtector>();
>    AU.addPreserved<StackProtector>();
>    AU.addPreserved<GCModuleInfo>();
> @@ -1416,6 +1419,7 @@ void SelectionDAGISel::SelectAllBasicBlo
>
>    // Iterate over all basic blocks in the function.
>    for (const BasicBlock *LLVMBB : RPOT) {
> +    CurDAG->IsDAGPartOfLoop = false;
>      if (OptLevel != CodeGenOpt::None) {
>        bool AllPredsVisited = true;
>        for (const_pred_iterator PI = pred_begin(LLVMBB), PE = pred_end(LLVMBB);
> @@ -1593,6 +1597,13 @@ void SelectionDAGISel::SelectAllBasicBlo
>                                     FunctionBasedInstrumentation);
>      }
>
> +    if (OptLevel != CodeGenOpt::None) {
> +      auto &LIWP = getAnalysis<LoopInfoWrapperPass>();
> +      LoopInfo &LI = LIWP.getLoopInfo();
> +      if (LI.getLoopFor(LLVMBB))
> +        CurDAG->IsDAGPartOfLoop = true;
> +    }
> +
>      if (Begin != BI)
>        ++NumDAGBlocks;
>      else
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelDAGToDAG.cpp Wed Oct  4 02:02:10 2017
> @@ -88,6 +88,11 @@ namespace {
>               IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr;
>      }
>
> +    bool hasComplexAddressingMode() const {
> +      return Disp && IndexReg.getNode() != nullptr &&
> +             Base_Reg.getNode() != nullptr;
> +    }
> +
>      /// Return true if this addressing mode is already RIP-relative.
>      bool isRIPRelative() const {
>        if (BaseType != RegBase) return false;
> @@ -97,6 +102,10 @@ namespace {
>        return false;
>      }
>
> +    bool isLegalScale() {
> +      return (Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8);
> +    }
> +
>      void setBaseReg(SDValue Reg) {
>        BaseType = RegBase;
>        Base_Reg = Reg;
> @@ -162,10 +171,13 @@ namespace {
>      /// If true, selector should try to optimize for minimum code size.
>      bool OptForMinSize;
>
> +    /// If true, selector should try to aggresively fold operands into AM.
> +    bool OptForAggressingFolding;
> +
>    public:
>      explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
>          : SelectionDAGISel(tm, OptLevel), OptForSize(false),
> -          OptForMinSize(false) {}
> +          OptForMinSize(false), OptForAggressingFolding(false) {}
>
>      StringRef getPassName() const override {
>        return "X86 DAG->DAG Instruction Selection";
> @@ -184,6 +196,12 @@ namespace {
>
>      void PreprocessISelDAG() override;
>
> +    void setAggressiveOperandFolding(bool val = false) {
> +      OptForAggressingFolding = val;
> +    }
> +
> +    bool getAggressiveOperandFolding() { return OptForAggressingFolding; }
> +
>  // Include the pieces autogenerated from the target description.
>  #include "X86GenDAGISel.inc"
>
> @@ -197,6 +215,7 @@ namespace {
>      bool matchAdd(SDValue N, X86ISelAddressMode &AM, unsigned Depth);
>      bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
>                                   unsigned Depth);
> +    bool matchAddressLEA(SDValue N, X86ISelAddressMode &AM);
>      bool matchAddressBase(SDValue N, X86ISelAddressMode &AM);
>      bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base,
>                      SDValue &Scale, SDValue &Index, SDValue &Disp,
> @@ -445,6 +464,20 @@ namespace {
>
>      bool isMaskZeroExtended(SDNode *N) const;
>    };
> +
> +  class X86AggressiveOperandFolding {
> +  public:
> +    explicit X86AggressiveOperandFolding(X86DAGToDAGISel &ISel, bool val)
> +        : Selector(&ISel) {
> +      Selector->setAggressiveOperandFolding(val);
> +    }
> +    ~X86AggressiveOperandFolding() {
> +      Selector->setAggressiveOperandFolding(false);
> +    }
> +
> +  private:
> +    X86DAGToDAGISel *Selector;
> +  };
>  }
>
>
> @@ -1191,7 +1224,7 @@ static bool foldMaskAndShiftToScale(Sele
>    AM.IndexReg = NewSRL;
>    return false;
>  }
> -
> +
>  bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM,
>                                                unsigned Depth) {
>    SDLoc dl(N);
> @@ -1199,8 +1232,11 @@ bool X86DAGToDAGISel::matchAddressRecurs
>        dbgs() << "MatchAddress: ";
>        AM.dump();
>      });
> -  // Limit recursion.
> -  if (Depth > 5)
> +
> +  // Limit recursion. For aggressive operand folding recurse
> +  // till depth 8 which is the maximum legal scale value.
> +  unsigned MaxDepth = getAggressiveOperandFolding() ? 8 : 5;
> +  if (Depth > MaxDepth)
>      return matchAddressBase(N, AM);
>
>    // If this is already a %rip relative address, we can only merge immediates
> @@ -1491,6 +1527,20 @@ bool X86DAGToDAGISel::matchAddressBase(S
>        return false;
>      }
>
> +    if (OptLevel != CodeGenOpt::None && getAggressiveOperandFolding() &&
> +        AM.BaseType == X86ISelAddressMode::RegBase) {
> +      if (AM.Base_Reg == N) {
> +        SDValue Base_Reg = AM.Base_Reg;
> +        AM.Base_Reg = AM.IndexReg;
> +        AM.IndexReg = Base_Reg;
> +        AM.Scale++;
> +        return false;
> +      } else if (AM.IndexReg == N) {
> +        AM.Scale++;
> +        return false;
> +      }
> +    }
> +
>      // Otherwise, we cannot select it.
>      return true;
>    }
> @@ -1721,7 +1771,7 @@ bool X86DAGToDAGISel::selectLEA64_32Addr
>                                           SDValue &Disp, SDValue &Segment) {
>    // Save the debug loc before calling selectLEAAddr, in case it invalidates N.
>    SDLoc DL(N);
> -
> +
>    if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment))
>      return false;
>
> @@ -1756,6 +1806,29 @@ bool X86DAGToDAGISel::selectLEA64_32Addr
>    return true;
>  }
>
> +bool X86DAGToDAGISel::matchAddressLEA(SDValue N, X86ISelAddressMode &AM) {
> +  // Avoid enabling aggressive operand folding when node N is a part of loop.
> +  X86AggressiveOperandFolding Enable(*this, !CurDAG->IsDAGPartOfLoop);
> +
> +  bool matchRes = matchAddress(N, AM);
> +
> +  // Check for legality of scale when recursion unwinds back to the top.
> +  if (!matchRes) {
> +    if (!AM.isLegalScale())
> +      return true;
> +
> +    // Avoid creating costly complex LEAs having scale less than 2
> +    // within loop.
> +    if(CurDAG->IsDAGPartOfLoop && Subtarget->slow3OpsLEA() &&
> +        AM.Scale <= 2 && AM.hasComplexAddressingMode() &&
> +         (!AM.hasSymbolicDisplacement() && N.getOpcode() < ISD::BUILTIN_OP_END))
> +     return true;
> +  }
> +
> +  return matchRes;
> +}
> +
> +
>  /// Calls SelectAddr and determines if the maximal addressing
>  /// mode it matches can be cost effectively emitted as an LEA instruction.
>  bool X86DAGToDAGISel::selectLEAAddr(SDValue N,
> @@ -1773,7 +1846,7 @@ bool X86DAGToDAGISel::selectLEAAddr(SDVa
>    SDValue Copy = AM.Segment;
>    SDValue T = CurDAG->getRegister(0, MVT::i32);
>    AM.Segment = T;
> -  if (matchAddress(N, AM))
> +  if (matchAddressLEA(N, AM))
>      return false;
>    assert (T == AM.Segment);
>    AM.Segment = Copy;
>
> Modified: llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86OptimizeLEAs.cpp Wed Oct  4 02:02:10 2017
> @@ -22,6 +22,7 @@
>  #include "X86Subtarget.h"
>  #include "llvm/ADT/Statistic.h"
>  #include "llvm/CodeGen/LiveVariables.h"
> +#include "llvm/CodeGen/MachineDominators.h"
>  #include "llvm/CodeGen/MachineFunctionPass.h"
>  #include "llvm/CodeGen/MachineInstrBuilder.h"
>  #include "llvm/CodeGen/MachineOperand.h"
> @@ -44,6 +45,7 @@ static cl::opt<bool>
>                       cl::init(false));
>
>  STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
> +STATISTIC(NumFactoredLEAs, "Number of LEAs factorized");
>  STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
>
>  /// \brief Returns true if two machine operands are identical and they are not
> @@ -51,6 +53,10 @@ STATISTIC(NumRedundantLEAs, "Number of r
>  static inline bool isIdenticalOp(const MachineOperand &MO1,
>                                   const MachineOperand &MO2);
>
> +/// \brief Returns true if two machine instructions have identical operands.
> +static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
> +                          const MachineOperand &MO2);
> +
>  /// \brief Returns true if two address displacement operands are of the same
>  /// type and use the same symbol/index/address regardless of the offset.
>  static bool isSimilarDispOp(const MachineOperand &MO1,
> @@ -59,21 +65,44 @@ static bool isSimilarDispOp(const Machin
>  /// \brief Returns true if the instruction is LEA.
>  static inline bool isLEA(const MachineInstr &MI);
>
> +/// \brief Returns true if Definition of Operand is a copylike instruction.
> +static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr);
> +
>  namespace {
>  /// A key based on instruction's memory operands.
>  class MemOpKey {
>  public:
>    MemOpKey(const MachineOperand *Base, const MachineOperand *Scale,
>             const MachineOperand *Index, const MachineOperand *Segment,
> -           const MachineOperand *Disp)
> -      : Disp(Disp) {
> +           const MachineOperand *Disp, bool DispCheck = false)
> +      : Disp(Disp), DeepCheck(DispCheck) {
>      Operands[0] = Base;
>      Operands[1] = Scale;
>      Operands[2] = Index;
>      Operands[3] = Segment;
>    }
>
> +  /// Checks operands of MemOpKey are identical, if Base or Index
> +  /// operand definitions are of kind SUBREG_TO_REG then compare
> +  /// operands of defining MI.
> +  bool performDeepCheck(const MemOpKey &Other) const {
> +    MachineInstr *MI = const_cast<MachineInstr *>(Operands[0]->getParent());
> +    MachineRegisterInfo *MRI = MI->getRegInfo();
> +
> +    for (int i = 0; i < 4; i++) {
> +      bool copyLike = isDefCopyLike(MRI, *Operands[i]);
> +      if (copyLike && !isIdenticalMI(MRI, *Operands[i], *Other.Operands[i]))
> +        return false;
> +      else if (!copyLike && !isIdenticalOp(*Operands[i], *Other.Operands[i]))
> +        return false;
> +    }
> +    return isIdenticalOp(*Disp, *Other.Disp);
> +  }
> +
>    bool operator==(const MemOpKey &Other) const {
> +    if (DeepCheck)
> +      return performDeepCheck(Other);
> +
>      // Addresses' bases, scales, indices and segments must be identical.
>      for (int i = 0; i < 4; ++i)
>        if (!isIdenticalOp(*Operands[i], *Other.Operands[i]))
> @@ -91,6 +120,12 @@ public:
>
>    // Address' displacement operand.
>    const MachineOperand *Disp;
> +
> +  // If true checks Address' base, index, segment and
> +  // displacement are identical, in additions if base/index
> +  // are defined by copylike instruction then futher
> +  // compare the operands of the defining instruction.
> +  bool DeepCheck;
>  };
>  } // end anonymous namespace
>
> @@ -114,12 +149,34 @@ template <> struct DenseMapInfo<MemOpKey
>    static unsigned getHashValue(const MemOpKey &Val) {
>      // Checking any field of MemOpKey is enough to determine if the key is
>      // empty or tombstone.
> +    hash_code Hash(0);
>      assert(Val.Disp != PtrInfo::getEmptyKey() && "Cannot hash the empty key");
>      assert(Val.Disp != PtrInfo::getTombstoneKey() &&
>             "Cannot hash the tombstone key");
>
> -    hash_code Hash = hash_combine(*Val.Operands[0], *Val.Operands[1],
> -                                  *Val.Operands[2], *Val.Operands[3]);
> +    auto getMIHash = [](MachineInstr *MI) -> hash_code {
> +      hash_code h(0);
> +      for (unsigned i = 1, e = MI->getNumOperands(); i < e; i++)
> +        h = hash_combine(h, MI->getOperand(i));
> +      return h;
> +    };
> +
> +    const MachineOperand &Base = *Val.Operands[0];
> +    const MachineOperand &Index = *Val.Operands[2];
> +    MachineInstr *MI = const_cast<MachineInstr *>(Base.getParent());
> +    MachineRegisterInfo *MRI = MI->getRegInfo();
> +
> +    if (isDefCopyLike(MRI, Base))
> +      Hash = getMIHash(MRI->getVRegDef(Base.getReg()));
> +    else
> +      Hash = hash_combine(Hash, Base);
> +
> +    if (isDefCopyLike(MRI, Index))
> +      Hash = getMIHash(MRI->getVRegDef(Index.getReg()));
> +    else
> +      Hash = hash_combine(Hash, Index);
> +
> +    Hash = hash_combine(Hash, *Val.Operands[1], *Val.Operands[3]);
>
>      // If the address displacement is an immediate, it should not affect the
>      // hash so that memory operands which differ only be immediate displacement
> @@ -178,6 +235,16 @@ static inline MemOpKey getMemOpKey(const
>                    &MI.getOperand(N + X86::AddrDisp));
>  }
>
> +static inline MemOpKey getMemOpCSEKey(const MachineInstr &MI, unsigned N) {
> +  static MachineOperand DummyScale = MachineOperand::CreateImm(1);
> +  assert((isLEA(MI) || MI.mayLoadOrStore()) &&
> +         "The instruction must be a LEA, a load or a store");
> +  return MemOpKey(&MI.getOperand(N + X86::AddrBaseReg), &DummyScale,
> +                  &MI.getOperand(N + X86::AddrIndexReg),
> +                  &MI.getOperand(N + X86::AddrSegmentReg),
> +                  &MI.getOperand(N + X86::AddrDisp), true);
> +}
> +
>  static inline bool isIdenticalOp(const MachineOperand &MO1,
>                                   const MachineOperand &MO2) {
>    return MO1.isIdenticalTo(MO2) &&
> @@ -185,6 +252,27 @@ static inline bool isIdenticalOp(const M
>            !TargetRegisterInfo::isPhysicalRegister(MO1.getReg()));
>  }
>
> +static bool isIdenticalMI(MachineRegisterInfo *MRI, const MachineOperand &MO1,
> +                          const MachineOperand &MO2) {
> +  MachineInstr *MI1 = nullptr;
> +  MachineInstr *MI2 = nullptr;
> +  if (!MO1.isReg() || !MO2.isReg())
> +    return false;
> +
> +  MI1 = MRI->getVRegDef(MO1.getReg());
> +  MI2 = MRI->getVRegDef(MO2.getReg());
> +  if (!MI1 || !MI2)
> +    return false;
> +  if (MI1->getOpcode() != MI2->getOpcode())
> +    return false;
> +  if (MI1->getNumOperands() != MI2->getNumOperands())
> +    return false;
> +  for (unsigned i = 1, e = MI1->getNumOperands(); i < e; ++i)
> +    if (!isIdenticalOp(MI1->getOperand(i), MI2->getOperand(i)))
> +      return false;
> +  return true;
> +}
> +
>  #ifndef NDEBUG
>  static bool isValidDispOp(const MachineOperand &MO) {
>    return MO.isImm() || MO.isCPI() || MO.isJTI() || MO.isSymbol() ||
> @@ -216,7 +304,140 @@ static inline bool isLEA(const MachineIn
>           Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
>  }
>
> +static bool isDefCopyLike(MachineRegisterInfo *MRI, const MachineOperand &Opr) {
> +  if (!Opr.isReg() || TargetRegisterInfo::isPhysicalRegister(Opr.getReg()))
> +    return false;
> +  MachineInstr *MI = MRI->getVRegDef(Opr.getReg());
> +  return MI && MI->isCopyLike();
> +}
> +
>  namespace {
> +
> +/// This class captures the functions and attributes
> +/// needed to factorize LEA within and across basic
> +/// blocks.LEA instruction with same BASE,OFFSET and
> +/// INDEX are the candidates for factorization.
> +class FactorizeLEAOpt {
> +public:
> +  using LEAListT = std::list<MachineInstr *>;
> +  using LEAMapT = DenseMap<MemOpKey, LEAListT>;
> +  using ValueT = DenseMap<MemOpKey, unsigned>;
> +  using ScopeEntryT = std::pair<MachineBasicBlock *, ValueT>;
> +  using ScopeStackT = std::vector<ScopeEntryT>;
> +
> +  FactorizeLEAOpt() = default;
> +  FactorizeLEAOpt(const FactorizeLEAOpt &) = delete;
> +  FactorizeLEAOpt &operator=(const FactorizeLEAOpt &) = delete;
> +
> +  void performCleanup() {
> +    for (auto LEA : removedLEAs)
> +      LEA->eraseFromParent();
> +    LEAs.clear();
> +    Stack.clear();
> +    removedLEAs.clear();
> +  }
> +
> +  LEAMapT &getLEAMap() { return LEAs; }
> +  ScopeEntryT *getTopScope() { return &Stack.back(); }
> +
> +  void addForLazyRemoval(MachineInstr *Instr) { removedLEAs.insert(Instr); }
> +
> +  bool checkIfScheduledForRemoval(MachineInstr *Instr) {
> +    return removedLEAs.find(Instr) != removedLEAs.end();
> +  }
> +
> +  /// Push the ScopeEntry for the BasicBlock over Stack.
> +  /// Also traverses over list of instruction and update
> +  /// LEAs Map and ScopeEntry for each LEA instruction
> +  /// found using insertLEA().
> +  void pushScope(MachineBasicBlock *MBB);
> +
> +  /// Stores the size of MachineInstr list corrosponding
> +  /// to key K from LEAs MAP into the ScopeEntry of
> +  /// the basic block, then insert the LEA at the beginning
> +  /// of the list.
> +  void insertLEA(MachineInstr *MI);
> +
> +  /// Pops out ScopeEntry of top most BasicBlock from the stack
> +  /// and remove the LEA instructions contained in the scope
> +  /// from the LEAs Map.
> +  void popScope();
> +
> +  /// If LEA contains Physical Registers then its not a candidate
> +  /// for factorizations since physical registers may violate SSA
> +  /// semantics of MI.
> +  bool containsPhyReg(MachineInstr *MI, unsigned RecLevel);
> +
> +private:
> +  ScopeStackT Stack;
> +  LEAMapT LEAs;
> +  std::set<MachineInstr *> removedLEAs;
> +};
> +
> +void FactorizeLEAOpt::pushScope(MachineBasicBlock *MBB) {
> +  ValueT EmptyMap;
> +  ScopeEntryT SE = std::make_pair(MBB, EmptyMap);
> +  Stack.push_back(SE);
> +  for (auto &MI : *MBB) {
> +    if (isLEA(MI))
> +      insertLEA(&MI);
> +  }
> +}
> +
> +void FactorizeLEAOpt::popScope() {
> +  ScopeEntryT &SE = Stack.back();
> +  for (auto MapEntry : SE.second) {
> +    LEAMapT::iterator Itr = LEAs.find(MapEntry.first);
> +    assert((Itr != LEAs.end()) &&
> +           "LEAs map must have a node corresponding to ScopeEntry's Key.");
> +
> +    while (((*Itr).second.size() > MapEntry.second))
> +      (*Itr).second.pop_front();
> +    // If list goes empty remove entry from LEAs Map.
> +    if ((*Itr).second.empty())
> +      LEAs.erase(Itr);
> +  }
> +  Stack.pop_back();
> +}
> +
> +bool FactorizeLEAOpt::containsPhyReg(MachineInstr *MI, unsigned RecLevel) {
> +  if (!MI || !RecLevel)
> +    return false;
> +
> +  MachineRegisterInfo *MRI = MI->getRegInfo();
> +  for (auto Operand : MI->operands()) {
> +    if (!Operand.isReg())
> +      continue;
> +    if (TargetRegisterInfo::isPhysicalRegister(Operand.getReg()))
> +      return true;
> +    MachineInstr *OperDefMI = MRI->getVRegDef(Operand.getReg());
> +    if (OperDefMI && (MI != OperDefMI) && OperDefMI->isCopyLike() &&
> +        containsPhyReg(OperDefMI, RecLevel - 1))
> +      return true;
> +  }
> +  return false;
> +}
> +
> +void FactorizeLEAOpt::insertLEA(MachineInstr *MI) {
> +  unsigned lsize;
> +  if (containsPhyReg(MI, 2))
> +    return;
> +
> +  MemOpKey Key = getMemOpCSEKey(*MI, 1);
> +  ScopeEntryT *TopScope = getTopScope();
> +
> +  LEAMapT::iterator Itr = LEAs.find(Key);
> +  if (Itr == LEAs.end()) {
> +    lsize = 0;
> +    LEAs[Key].push_front(MI);
> +  } else {
> +    lsize = (*Itr).second.size();
> +    (*Itr).second.push_front(MI);
> +  }
> +  if (TopScope->second.find(Key) == TopScope->second.end())
> +    TopScope->second[Key] = lsize;
> +}
> +
>  class OptimizeLEAPass : public MachineFunctionPass {
>  public:
>    OptimizeLEAPass() : MachineFunctionPass(ID) {}
> @@ -228,6 +449,12 @@ public:
>    /// been calculated by LEA. Also, remove redundant LEAs.
>    bool runOnMachineFunction(MachineFunction &MF) override;
>
> +  void getAnalysisUsage(AnalysisUsage &AU) const override {
> +    AU.setPreservesCFG();
> +    MachineFunctionPass::getAnalysisUsage(AU);
> +    AU.addRequired<MachineDominatorTree>();
> +  }
> +
>  private:
>    typedef DenseMap<MemOpKey, SmallVector<MachineInstr *, 16>> MemOpMap;
>
> @@ -273,8 +500,24 @@ private:
>    /// \brief Removes LEAs which calculate similar addresses.
>    bool removeRedundantLEAs(MemOpMap &LEAs);
>
> +  /// \brief Visit over basic blocks, collect LEAs in a scoped
> +  ///  hash map (FactorizeLEAOpt::LEAs) and try to factor them out.
> +  bool FactorizeLEAsAllBasicBlocks(MachineFunction &MF);
> +
> +  bool FactorizeLEAsBasicBlock(MachineDomTreeNode *DN);
> +
> +  /// \brief Factor out LEAs which share Base,Index,Offset and Segment.
> +  bool processBasicBlock(const MachineBasicBlock &MBB);
> +
> +  /// \brief Try to replace LEA with a lower strength instruction
> +  /// to improves latency and throughput.
> +  bool strengthReduceLEAs(MemOpMap &LEAs, const MachineBasicBlock &MBB);
> +
>    DenseMap<const MachineInstr *, unsigned> InstrPos;
>
> +  FactorizeLEAOpt FactorOpt;
> +
> +  MachineDominatorTree *DT;
>    MachineRegisterInfo *MRI;
>    const X86InstrInfo *TII;
>    const X86RegisterInfo *TRI;
> @@ -647,6 +890,168 @@ bool OptimizeLEAPass::removeRedundantLEA
>    return Changed;
>  }
>
> +static inline int getADDrrFromLEA(int LEAOpcode) {
> +  switch (LEAOpcode) {
> +  default:
> +    llvm_unreachable("Unexpected LEA instruction");
> +  case X86::LEA16r:
> +    return X86::ADD16rr;
> +  case X86::LEA32r:
> +    return X86::ADD32rr;
> +  case X86::LEA64_32r:
> +  case X86::LEA64r:
> +    return X86::ADD64rr;
> +  }
> +}
> +
> +bool OptimizeLEAPass::strengthReduceLEAs(MemOpMap &LEAs,
> +                                         const MachineBasicBlock &BB) {
> +  bool Changed = false;
> +
> +  // Loop over all entries in the table.
> +  for (auto &E : LEAs) {
> +    auto &List = E.second;
> +
> +    // Loop over all LEA pairs.
> +    for (auto I1 = List.begin(); I1 != List.end(); I1++) {
> +      MachineInstrBuilder NewMI;
> +      MachineInstr &First = **I1;
> +      MachineOperand &Res = First.getOperand(0);
> +      MachineOperand &Base = First.getOperand(1);
> +      MachineOperand &Scale = First.getOperand(2);
> +      MachineOperand &Index = First.getOperand(3);
> +      MachineOperand &Offset = First.getOperand(4);
> +
> +      const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(First.getOpcode()));
> +      const DebugLoc DL = First.getDebugLoc();
> +
> +      if (!Base.isReg() || !Index.isReg())
> +        continue;
> +      if (TargetRegisterInfo::isPhysicalRegister(Res.getReg()) ||
> +          TargetRegisterInfo::isPhysicalRegister(Base.getReg()) ||
> +          TargetRegisterInfo::isPhysicalRegister(Index.getReg()))
> +        continue;
> +
> +      MachineBasicBlock &MBB = *(const_cast<MachineBasicBlock *>(&BB));
> +      if (Scale.isImm() && Scale.getImm() == 1) {
> +        // R = B + I
> +        if (Offset.isImm() && !Offset.getImm()) {
> +          NewMI = BuildMI(MBB, &First, DL, ADDrr)
> +                      .addDef(Res.getReg())
> +                      .addUse(Base.getReg())
> +                      .addUse(Index.getReg());
> +          Changed = NewMI.getInstr() != nullptr;
> +          First.eraseFromParent();
> +        }
> +      }
> +    }
> +  }
> +  return Changed;
> +}
> +
> +bool OptimizeLEAPass::processBasicBlock(const MachineBasicBlock &MBB) {
> +  bool cseDone = false;
> +
> +  // Legal scale value (1,2,4 & 8) vector.
> +  int LegalScale[9] = {0, 1, 1, 0, 1, 0, 0, 0, 1};
> +
> +  auto CompareFn = [](const MachineInstr *Arg1,
> +                      const MachineInstr *Arg2) -> bool {
> +    if (Arg1->getOperand(2).getImm() < Arg2->getOperand(2).getImm())
> +      return false;
> +    return true;
> +  };
> +
> +  // Loop over all entries in the table.
> +  for (auto &E : FactorOpt.getLEAMap()) {
> +    auto &List = E.second;
> +    if (List.size() > 1)
> +      List.sort(CompareFn);
> +
> +    // Loop over all LEA pairs.
> +    for (auto Iter1 = List.begin(); Iter1 != List.end(); Iter1++) {
> +      for (auto Iter2 = std::next(Iter1); Iter2 != List.end(); Iter2++) {
> +        MachineInstr &LI1 = **Iter1;
> +        MachineInstr &LI2 = **Iter2;
> +
> +        if (!DT->dominates(&LI2, &LI1))
> +          continue;
> +
> +        int Scale1 = LI1.getOperand(2).getImm();
> +        int Scale2 = LI2.getOperand(2).getImm();
> +        assert(LI2.getOperand(0).isReg() && "Result is a VirtualReg");
> +        DebugLoc DL = LI1.getDebugLoc();
> +
> +        if (FactorOpt.checkIfScheduledForRemoval(&LI1))
> +          continue;
> +
> +        int Factor = Scale1 - Scale2;
> +        if (Factor > 0 && LegalScale[Factor]) {
> +          DEBUG(dbgs() << "CSE LEAs: Candidate to replace: "; LI1.dump(););
> +          MachineInstrBuilder NewMI =
> +              BuildMI(*(const_cast<MachineBasicBlock *>(&MBB)), &LI1, DL,
> +                      TII->get(LI1.getOpcode()))
> +                  .addDef(LI1.getOperand(0).getReg()) // Dst   = Dst of LI1.
> +                  .addUse(LI2.getOperand(0).getReg()) // Base  = Dst of LI2.
> +                  .addImm(Factor) // Scale = Diff b/w scales.
> +                  .addUse(LI1.getOperand(3).getReg()) // Index = Index of LI1.
> +                  .addImm(0)                          // Disp  = 0
> +                  .addUse(
> +                      LI1.getOperand(5).getReg()); // Segment = Segmant of LI1.
> +
> +          cseDone = NewMI.getInstr() != nullptr;
> +
> +          LI1.getOperand(0).setIsDef(false);
> +
> +          /// Lazy removal shall ensure that replaced LEA remains
> +          /// till we finish processing all the basic block. This shall
> +          /// provide opportunity for further factorization based on
> +          /// the replaced LEA which will be legal since it has same
> +          /// destination as newly formed LEA.
> +          FactorOpt.addForLazyRemoval(&LI1);
> +
> +          NumFactoredLEAs++;
> +          DEBUG(dbgs() << "CSE LEAs: Replaced by: "; NewMI->dump(););
> +        }
> +      }
> +    }
> +  }
> +  return cseDone;
> +}
> +
> +bool OptimizeLEAPass::FactorizeLEAsBasicBlock(MachineDomTreeNode *DN) {
> +  bool Changed = false;
> +  using StackT = SmallVector<MachineDomTreeNode* , 16>;
> +  using ProcessedMapT = DenseMap<MachineDomTreeNode* , bool>;
> +
> +  StackT WorkList;
> +  ProcessedMapT ProcessesMap;
> +
> +  WorkList.push_back(DN);
> +  while(!WorkList.empty()) {
> +    MachineDomTreeNode * MDN = WorkList.back();
> +    if (ProcessesMap.find(MDN) == ProcessesMap.end()) {
> +       ProcessesMap[MDN] = true;
> +       FactorOpt.pushScope(MDN->getBlock());
> +       Changed |= processBasicBlock(*MDN->getBlock());
> +       for (auto Child : MDN->getChildren())
> +          WorkList.push_back(Child);
> +    }
> +    MachineDomTreeNode *TDM = WorkList.back();
> +    if (MDN->getLevel() == TDM->getLevel()) {
> +       FactorOpt.popScope();
> +       WorkList.pop_back();
> +    }
> +  }
> +  return Changed;
> +}
> +
> +bool OptimizeLEAPass::FactorizeLEAsAllBasicBlocks(MachineFunction &MF) {
> +  bool Changed = FactorizeLEAsBasicBlock(DT->getRootNode());
> +  FactorOpt.performCleanup();
> +  return Changed;
> +}
> +
>  bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
>    bool Changed = false;
>
> @@ -656,6 +1061,10 @@ bool OptimizeLEAPass::runOnMachineFuncti
>    MRI = &MF.getRegInfo();
>    TII = MF.getSubtarget<X86Subtarget>().getInstrInfo();
>    TRI = MF.getSubtarget<X86Subtarget>().getRegisterInfo();
> +  DT = &getAnalysis<MachineDominatorTree>();
> +
> +  // Attempt factorizing LEAs.
> +  Changed |= FactorizeLEAsAllBasicBlocks(MF);
>
>    // Process all basic blocks.
>    for (auto &MBB : MF) {
> @@ -672,6 +1081,9 @@ bool OptimizeLEAPass::runOnMachineFuncti
>      // Remove redundant LEA instructions.
>      Changed |= removeRedundantLEAs(LEAs);
>
> +    // Strength reduce LEA instructions.
> +    Changed |= strengthReduceLEAs(LEAs, MBB);
> +
>      // Remove redundant address calculations. Do it only for -Os/-Oz since only
>      // a code size gain is expected from this part of the pass.
>      if (MF.getFunction()->optForSize())
>
> Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/GlobalISel/callingconv.ll Wed Oct  4 02:02:10 2017
> @@ -405,7 +405,7 @@ define void @test_variadic_call_2(i8** %
>  ; X32-NEXT:    movl 4(%ecx), %ecx
>  ; X32-NEXT:    movl %eax, (%esp)
>  ; X32-NEXT:    movl $4, %eax
> -; X32-NEXT:    leal (%esp,%eax), %eax
> +; X32-NEXT:    addl %esp, %eax
>  ; X32-NEXT:    movl %edx, 4(%esp)
>  ; X32-NEXT:    movl %ecx, 4(%eax)
>  ; X32-NEXT:    calll variadic_callee
>
> Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/GlobalISel/gep.ll Wed Oct  4 02:02:10 2017
> @@ -5,10 +5,10 @@
>  define i32* @test_gep_i8(i32 *%arr, i8 %ind) {
>  ; X64_GISEL-LABEL: test_gep_i8:
>  ; X64_GISEL:       # BB#0:
> -; X64_GISEL-NEXT:    movq $4, %rax
> -; X64_GISEL-NEXT:    movsbq %sil, %rcx
> -; X64_GISEL-NEXT:    imulq %rax, %rcx
> -; X64_GISEL-NEXT:    leaq (%rdi,%rcx), %rax
> +; X64_GISEL-NEXT:    movq $4, %rcx
> +; X64_GISEL-NEXT:    movsbq %sil, %rax
> +; X64_GISEL-NEXT:    imulq %rcx, %rax
> +; X64_GISEL-NEXT:    addq %rdi, %rax
>  ; X64_GISEL-NEXT:    retq
>  ;
>  ; X64-LABEL: test_gep_i8:
> @@ -25,7 +25,7 @@ define i32* @test_gep_i8_const(i32 *%arr
>  ; X64_GISEL-LABEL: test_gep_i8_const:
>  ; X64_GISEL:       # BB#0:
>  ; X64_GISEL-NEXT:    movq $80, %rax
> -; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
> +; X64_GISEL-NEXT:    addq %rdi, %rax
>  ; X64_GISEL-NEXT:    retq
>  ;
>  ; X64-LABEL: test_gep_i8_const:
> @@ -39,10 +39,10 @@ define i32* @test_gep_i8_const(i32 *%arr
>  define i32* @test_gep_i16(i32 *%arr, i16 %ind) {
>  ; X64_GISEL-LABEL: test_gep_i16:
>  ; X64_GISEL:       # BB#0:
> -; X64_GISEL-NEXT:    movq $4, %rax
> -; X64_GISEL-NEXT:    movswq %si, %rcx
> -; X64_GISEL-NEXT:    imulq %rax, %rcx
> -; X64_GISEL-NEXT:    leaq (%rdi,%rcx), %rax
> +; X64_GISEL-NEXT:    movq $4, %rcx
> +; X64_GISEL-NEXT:    movswq %si, %rax
> +; X64_GISEL-NEXT:    imulq %rcx, %rax
> +; X64_GISEL-NEXT:    addq %rdi, %rax
>  ; X64_GISEL-NEXT:    retq
>  ;
>  ; X64-LABEL: test_gep_i16:
> @@ -59,7 +59,7 @@ define i32* @test_gep_i16_const(i32 *%ar
>  ; X64_GISEL-LABEL: test_gep_i16_const:
>  ; X64_GISEL:       # BB#0:
>  ; X64_GISEL-NEXT:    movq $80, %rax
> -; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
> +; X64_GISEL-NEXT:    addq %rdi, %rax
>  ; X64_GISEL-NEXT:    retq
>  ;
>  ; X64-LABEL: test_gep_i16_const:
> @@ -73,10 +73,10 @@ define i32* @test_gep_i16_const(i32 *%ar
>  define i32* @test_gep_i32(i32 *%arr, i32 %ind) {
>  ; X64_GISEL-LABEL: test_gep_i32:
>  ; X64_GISEL:       # BB#0:
> -; X64_GISEL-NEXT:    movq $4, %rax
> -; X64_GISEL-NEXT:    movslq %esi, %rcx
> -; X64_GISEL-NEXT:    imulq %rax, %rcx
> -; X64_GISEL-NEXT:    leaq (%rdi,%rcx), %rax
> +; X64_GISEL-NEXT:    movq $4, %rcx
> +; X64_GISEL-NEXT:    movslq %esi, %rax
> +; X64_GISEL-NEXT:    imulq %rcx, %rax
> +; X64_GISEL-NEXT:    addq %rdi, %rax
>  ; X64_GISEL-NEXT:    retq
>  ;
>  ; X64-LABEL: test_gep_i32:
> @@ -92,7 +92,7 @@ define i32* @test_gep_i32_const(i32 *%ar
>  ; X64_GISEL-LABEL: test_gep_i32_const:
>  ; X64_GISEL:       # BB#0:
>  ; X64_GISEL-NEXT:    movq $20, %rax
> -; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
> +; X64_GISEL-NEXT:    addq %rdi, %rax
>  ; X64_GISEL-NEXT:    retq
>  ;
>  ; X64-LABEL: test_gep_i32_const:
> @@ -108,7 +108,7 @@ define i32* @test_gep_i64(i32 *%arr, i64
>  ; X64_GISEL:       # BB#0:
>  ; X64_GISEL-NEXT:    movq $4, %rax
>  ; X64_GISEL-NEXT:    imulq %rsi, %rax
> -; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
> +; X64_GISEL-NEXT:    addq %rdi, %rax
>  ; X64_GISEL-NEXT:    retq
>  ;
>  ; X64-LABEL: test_gep_i64:
> @@ -123,7 +123,7 @@ define i32* @test_gep_i64_const(i32 *%ar
>  ; X64_GISEL-LABEL: test_gep_i64_const:
>  ; X64_GISEL:       # BB#0:
>  ; X64_GISEL-NEXT:    movq $20, %rax
> -; X64_GISEL-NEXT:    leaq (%rdi,%rax), %rax
> +; X64_GISEL-NEXT:    addq %rdi, %rax
>  ; X64_GISEL-NEXT:    retq
>  ;
>  ; X64-LABEL: test_gep_i64_const:
>
> Modified: llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/GlobalISel/memop-scalar.ll Wed Oct  4 02:02:10 2017
> @@ -181,7 +181,7 @@ define i32 @test_gep_folding_largeGepInd
>  ; ALL-LABEL: test_gep_folding_largeGepIndex:
>  ; ALL:       # BB#0:
>  ; ALL-NEXT:    movabsq $228719476720, %rax # imm = 0x3540BE3FF0
> -; ALL-NEXT:    leaq (%rdi,%rax), %rax
> +; ALL-NEXT:    addq %rdi, %rax
>  ; ALL-NEXT:    movl %esi, (%rax)
>  ; ALL-NEXT:    movl (%rax), %eax
>  ; ALL-NEXT:    retq
>
> Modified: llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/lea-opt-cse1.ll Wed Oct  4 02:02:10 2017
> @@ -9,29 +9,21 @@ define void @test_func(%struct.SA* nocap
>  ; X64:       # BB#0: # %entry
>  ; X64-NEXT:    movl (%rdi), %eax
>  ; X64-NEXT:    movl 16(%rdi), %ecx
> -; X64-NEXT:    leal (%rax,%rcx), %edx
>  ; X64-NEXT:    leal 1(%rax,%rcx), %eax
>  ; X64-NEXT:    movl %eax, 12(%rdi)
> -; X64-NEXT:    leal 1(%rcx,%rdx), %eax
> +; X64-NEXT:    addq %rcx, %eax
>  ; X64-NEXT:    movl %eax, 16(%rdi)
>  ; X64-NEXT:    retq
>  ;
>  ; X86-LABEL: test_func:
>  ; X86:       # BB#0: # %entry
> -; X86-NEXT:    pushl %esi
> -; X86-NEXT:  .Lcfi0:
> -; X86-NEXT:    .cfi_def_cfa_offset 8
> -; X86-NEXT:  .Lcfi1:
> -; X86-NEXT:    .cfi_offset %esi, -8
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
>  ; X86-NEXT:    movl (%eax), %ecx
>  ; X86-NEXT:    movl 16(%eax), %edx
> -; X86-NEXT:    leal 1(%ecx,%edx), %esi
> +; X86-NEXT:    leal 1(%ecx,%edx), %ecx
> +; X86-NEXT:    movl %ecx, 12(%eax)
>  ; X86-NEXT:    addl %edx, %ecx
> -; X86-NEXT:    movl %esi, 12(%eax)
> -; X86-NEXT:    leal 1(%edx,%ecx), %ecx
>  ; X86-NEXT:    movl %ecx, 16(%eax)
> -; X86-NEXT:    popl %esi
>  ; X86-NEXT:    retl
>   entry:
>     %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
>
> Modified: llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/lea-opt-cse2.ll Wed Oct  4 02:02:10 2017
> @@ -1,6 +1,6 @@
>  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
> -; RUN: llc < %s -mtriple=i686-unknown   | FileCheck %s -check-prefix=X86
> +; RUN: llc < %s -mtriple=x86_64-unknown  -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
> +; RUN: llc < %s -mtriple=i686-unknown  -mattr=+slow-3ops-lea  | FileCheck %s -check-prefix=X86
>
>  %struct.SA = type { i32 , i32 , i32 , i32 , i32};
>
> @@ -10,47 +10,41 @@ define void @foo(%struct.SA* nocapture %
>  ; X64-NEXT:    .p2align 4, 0x90
>  ; X64-NEXT:  .LBB0_1: # %loop
>  ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
> -; X64-NEXT:    movl (%rdi), %eax
> -; X64-NEXT:    movl 16(%rdi), %ecx
> -; X64-NEXT:    leal 1(%rax,%rcx), %edx
> -; X64-NEXT:    movl %edx, 12(%rdi)
> +; X64-NEXT:    movl 16(%rdi), %eax
> +; X64-NEXT:    movl (%rdi), %ecx
> +; X64-NEXT:    addl %eax, %ecx
> +; X64-NEXT:    incl %ecx
> +; X64-NEXT:    movl %ecx, 12(%rdi)
>  ; X64-NEXT:    decl %esi
>  ; X64-NEXT:    jne .LBB0_1
>  ; X64-NEXT:  # BB#2: # %exit
> -; X64-NEXT:    addl %ecx, %eax
> -; X64-NEXT:    leal 1(%rcx,%rax), %eax
> -; X64-NEXT:    movl %eax, 16(%rdi)
> +; X64-NEXT:    addl %eax, %ecx
> +; X64-NEXT:    movl %ecx, 16(%rdi)
>  ; X64-NEXT:    retq
>  ;
>  ; X86-LABEL: foo:
>  ; X86:       # BB#0: # %entry
> -; X86-NEXT:    pushl %edi
> +; X86-NEXT:    pushl %esi
>  ; X86-NEXT:  .Lcfi0:
>  ; X86-NEXT:    .cfi_def_cfa_offset 8
> -; X86-NEXT:    pushl %esi
>  ; X86-NEXT:  .Lcfi1:
> -; X86-NEXT:    .cfi_def_cfa_offset 12
> -; X86-NEXT:  .Lcfi2:
> -; X86-NEXT:    .cfi_offset %esi, -12
> -; X86-NEXT:  .Lcfi3:
> -; X86-NEXT:    .cfi_offset %edi, -8
> +; X86-NEXT:    .cfi_offset %esi, -8
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
>  ; X86-NEXT:    .p2align 4, 0x90
>  ; X86-NEXT:  .LBB0_1: # %loop
>  ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
> -; X86-NEXT:    movl (%eax), %edx
> -; X86-NEXT:    movl 16(%eax), %esi
> -; X86-NEXT:    leal 1(%edx,%esi), %edi
> -; X86-NEXT:    movl %edi, 12(%eax)
> +; X86-NEXT:    movl 16(%eax), %edx
> +; X86-NEXT:    movl (%eax), %esi
> +; X86-NEXT:    addl %edx, %esi
> +; X86-NEXT:    incl %esi
> +; X86-NEXT:    movl %esi, 12(%eax)
>  ; X86-NEXT:    decl %ecx
>  ; X86-NEXT:    jne .LBB0_1
>  ; X86-NEXT:  # BB#2: # %exit
> -; X86-NEXT:    addl %esi, %edx
> -; X86-NEXT:    leal 1(%esi,%edx), %ecx
> -; X86-NEXT:    movl %ecx, 16(%eax)
> +; X86-NEXT:    addl %edx, %esi
> +; X86-NEXT:    movl %esi, 16(%eax)
>  ; X86-NEXT:    popl %esi
> -; X86-NEXT:    popl %edi
>  ; X86-NEXT:    retl
>   entry:
>     br label %loop
>
> Modified: llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/lea-opt-cse3.ll Wed Oct  4 02:02:10 2017
> @@ -8,7 +8,7 @@ define i32 @foo(i32 %a, i32 %b) local_un
>  ; X64-NEXT:    # kill: %ESI<def> %ESI<kill> %RSI<def>
>  ; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
>  ; X64-NEXT:    leal 4(%rdi,%rsi,2), %ecx
> -; X64-NEXT:    leal 4(%rdi,%rsi,4), %eax
> +; X64-NEXT:    leal (%ecx,%rsi,2), %eax
>  ; X64-NEXT:    imull %ecx, %eax
>  ; X64-NEXT:    retq
>  ;
> @@ -16,9 +16,9 @@ define i32 @foo(i32 %a, i32 %b) local_un
>  ; X86:       # BB#0: # %entry
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
> -; X86-NEXT:    leal 4(%ecx,%eax,2), %edx
> -; X86-NEXT:    leal 4(%ecx,%eax,4), %eax
> -; X86-NEXT:    imull %edx, %eax
> +; X86-NEXT:    leal 4(%ecx,%eax,2), %ecx
> +; X86-NEXT:    leal (%ecx,%eax,2), %eax
> +; X86-NEXT:    imull %ecx, %eax
>  ; X86-NEXT:    retl
>  entry:
>    %mul = shl i32 %b, 1
> @@ -36,7 +36,7 @@ define i32 @foo1(i32 %a, i32 %b) local_u
>  ; X64-NEXT:    # kill: %ESI<def> %ESI<kill> %RSI<def>
>  ; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
>  ; X64-NEXT:    leal 4(%rdi,%rsi,4), %ecx
> -; X64-NEXT:    leal 4(%rdi,%rsi,8), %eax
> +; X64-NEXT:    leal (%ecx,%rsi,4), %eax
>  ; X64-NEXT:    imull %ecx, %eax
>  ; X64-NEXT:    retq
>  ;
> @@ -44,9 +44,9 @@ define i32 @foo1(i32 %a, i32 %b) local_u
>  ; X86:       # BB#0: # %entry
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
> -; X86-NEXT:    leal 4(%ecx,%eax,4), %edx
> -; X86-NEXT:    leal 4(%ecx,%eax,8), %eax
> -; X86-NEXT:    imull %edx, %eax
> +; X86-NEXT:    leal 4(%ecx,%eax,4), %ecx
> +; X86-NEXT:    leal (%ecx,%eax,4), %eax
> +; X86-NEXT:    imull %ecx, %eax
>  ; X86-NEXT:    retl
>  entry:
>    %mul = shl i32 %b, 2
> @@ -68,31 +68,23 @@ define i32 @foo1_mult_basic_blocks(i32 %
>  ; X64-NEXT:    cmpl $10, %ecx
>  ; X64-NEXT:    je .LBB2_2
>  ; X64-NEXT:  # BB#1: # %mid
> -; X64-NEXT:    leal 4(%rdi,%rsi,8), %eax
> -; X64-NEXT:    imull %eax, %ecx
> -; X64-NEXT:    movl %ecx, %eax
> +; X64-NEXT:    leal (%ecx,%rsi,4), %eax
> +; X64-NEXT:    imull %ecx, %eax
>  ; X64-NEXT:  .LBB2_2: # %exit
>  ; X64-NEXT:    retq
>  ;
>  ; X86-LABEL: foo1_mult_basic_blocks:
>  ; X86:       # BB#0: # %entry
> -; X86-NEXT:    pushl %esi
> -; X86-NEXT:  .Lcfi0:
> -; X86-NEXT:    .cfi_def_cfa_offset 8
> -; X86-NEXT:  .Lcfi1:
> -; X86-NEXT:    .cfi_offset %esi, -8
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
> -; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
> -; X86-NEXT:    leal 4(%esi,%edx,4), %ecx
> +; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
> +; X86-NEXT:    leal 4(%eax,%edx,4), %ecx
>  ; X86-NEXT:    xorl %eax, %eax
>  ; X86-NEXT:    cmpl $10, %ecx
>  ; X86-NEXT:    je .LBB2_2
>  ; X86-NEXT:  # BB#1: # %mid
> -; X86-NEXT:    leal 4(%esi,%edx,8), %eax
> -; X86-NEXT:    imull %eax, %ecx
> -; X86-NEXT:    movl %ecx, %eax
> +; X86-NEXT:    leal (%ecx,%edx,4), %eax
> +; X86-NEXT:    imull %ecx, %eax
>  ; X86-NEXT:  .LBB2_2: # %exit
> -; X86-NEXT:    popl %esi
>  ; X86-NEXT:    retl
>  entry:
>    %mul = shl i32 %b, 2
> @@ -131,9 +123,9 @@ define i32 @foo1_mult_basic_blocks_illeg
>  ; X86-LABEL: foo1_mult_basic_blocks_illegal_scale:
>  ; X86:       # BB#0: # %entry
>  ; X86-NEXT:    pushl %esi
> -; X86-NEXT:  .Lcfi2:
> +; X86-NEXT:  .Lcfi0:
>  ; X86-NEXT:    .cfi_def_cfa_offset 8
> -; X86-NEXT:  .Lcfi3:
> +; X86-NEXT:  .Lcfi1:
>  ; X86-NEXT:    .cfi_offset %esi, -8
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
>
> Modified: llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/lea-opt-cse4.ll Wed Oct  4 02:02:10 2017
> @@ -1,43 +1,31 @@
>  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
> -; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
> -; RUN: llc < %s -mtriple=i686-unknown   | FileCheck %s -check-prefix=X86
> +; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X64
> +; RUN: llc < %s -mtriple=i686-unknown -mattr=+slow-3ops-lea | FileCheck %s -check-prefix=X86
>
>  %struct.SA = type { i32 , i32 , i32 , i32 , i32};
>
>  define void @foo(%struct.SA* nocapture %ctx, i32 %n) local_unnamed_addr #0 {
>  ; X64-LABEL: foo:
>  ; X64:       # BB#0: # %entry
> -; X64-NEXT:    movl 16(%rdi), %eax
> -; X64-NEXT:    movl (%rdi), %ecx
> -; X64-NEXT:    addl %eax, %ecx
> -; X64-NEXT:    addl %eax, %ecx
> -; X64-NEXT:    addl %eax, %ecx
> -; X64-NEXT:    leal (%rcx,%rax), %edx
> -; X64-NEXT:    leal 1(%rax,%rcx), %ecx
> -; X64-NEXT:    movl %ecx, 12(%rdi)
> -; X64-NEXT:    leal 1(%rax,%rdx), %eax
> +; X64-NEXT:    movl (%rdi), %eax
> +; X64-NEXT:    movl 16(%rdi), %ecx
> +; X64-NEXT:    leal (%rax,%rcx,4), %eax
> +; X64-NEXT:    addl $1, %eax
> +; X64-NEXT:    movl %eax, 12(%rdi)
> +; X64-NEXT:    addl %ecx, %eax
>  ; X64-NEXT:    movl %eax, 16(%rdi)
>  ; X64-NEXT:    retq
>  ;
>  ; X86-LABEL: foo:
>  ; X86:       # BB#0: # %entry
> -; X86-NEXT:    pushl %esi
> -; X86-NEXT:  .Lcfi0:
> -; X86-NEXT:    .cfi_def_cfa_offset 8
> -; X86-NEXT:  .Lcfi1:
> -; X86-NEXT:    .cfi_offset %esi, -8
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
> -; X86-NEXT:    movl 16(%eax), %ecx
> -; X86-NEXT:    movl (%eax), %edx
> -; X86-NEXT:    addl %ecx, %edx
> -; X86-NEXT:    addl %ecx, %edx
> -; X86-NEXT:    addl %ecx, %edx
> -; X86-NEXT:    leal 1(%ecx,%edx), %esi
> -; X86-NEXT:    addl %ecx, %edx
> -; X86-NEXT:    movl %esi, 12(%eax)
> -; X86-NEXT:    leal 1(%ecx,%edx), %ecx
> +; X86-NEXT:    movl (%eax), %ecx
> +; X86-NEXT:    movl 16(%eax), %edx
> +; X86-NEXT:    leal (%ecx,%edx,4), %ecx
> +; X86-NEXT:    addl $1, %ecx
> +; X86-NEXT:    movl %ecx, 12(%eax)
> +; X86-NEXT:    addl %edx, %ecx
>  ; X86-NEXT:    movl %ecx, 16(%eax)
> -; X86-NEXT:    popl %esi
>  ; X86-NEXT:    retl
>   entry:
>     %h0 = getelementptr inbounds %struct.SA, %struct.SA* %ctx, i64 0, i32 0
> @@ -64,15 +52,15 @@ define void @foo_loop(%struct.SA* nocapt
>  ; X64-NEXT:    .p2align 4, 0x90
>  ; X64-NEXT:  .LBB1_1: # %loop
>  ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
> -; X64-NEXT:    movl (%rdi), %ecx
>  ; X64-NEXT:    movl 16(%rdi), %eax
> -; X64-NEXT:    leal 1(%rcx,%rax), %edx
> -; X64-NEXT:    movl %edx, 12(%rdi)
> +; X64-NEXT:    movl (%rdi), %ecx
> +; X64-NEXT:    addl %eax, %ecx
> +; X64-NEXT:    incl %ecx
> +; X64-NEXT:    movl %ecx, 12(%rdi)
>  ; X64-NEXT:    decl %esi
>  ; X64-NEXT:    jne .LBB1_1
>  ; X64-NEXT:  # BB#2: # %exit
>  ; X64-NEXT:    addl %eax, %ecx
> -; X64-NEXT:    leal 1(%rax,%rcx), %ecx
>  ; X64-NEXT:    addl %eax, %ecx
>  ; X64-NEXT:    addl %eax, %ecx
>  ; X64-NEXT:    addl %eax, %ecx
> @@ -84,30 +72,25 @@ define void @foo_loop(%struct.SA* nocapt
>  ;
>  ; X86-LABEL: foo_loop:
>  ; X86:       # BB#0: # %entry
> -; X86-NEXT:    pushl %edi
> -; X86-NEXT:  .Lcfi2:
> -; X86-NEXT:    .cfi_def_cfa_offset 8
>  ; X86-NEXT:    pushl %esi
> -; X86-NEXT:  .Lcfi3:
> -; X86-NEXT:    .cfi_def_cfa_offset 12
> -; X86-NEXT:  .Lcfi4:
> -; X86-NEXT:    .cfi_offset %esi, -12
> -; X86-NEXT:  .Lcfi5:
> -; X86-NEXT:    .cfi_offset %edi, -8
> -; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
> +; X86-NEXT:  .Lcfi0:
> +; X86-NEXT:    .cfi_def_cfa_offset 8
> +; X86-NEXT:  .Lcfi1:
> +; X86-NEXT:    .cfi_offset %esi, -8
> +; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
>  ; X86-NEXT:    .p2align 4, 0x90
>  ; X86-NEXT:  .LBB1_1: # %loop
>  ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
> -; X86-NEXT:    movl (%eax), %esi
>  ; X86-NEXT:    movl 16(%eax), %ecx
> -; X86-NEXT:    leal 1(%esi,%ecx), %edi
> -; X86-NEXT:    movl %edi, 12(%eax)
> -; X86-NEXT:    decl %edx
> +; X86-NEXT:    movl (%eax), %edx
> +; X86-NEXT:    addl %ecx, %edx
> +; X86-NEXT:    incl %edx
> +; X86-NEXT:    movl %edx, 12(%eax)
> +; X86-NEXT:    decl %esi
>  ; X86-NEXT:    jne .LBB1_1
>  ; X86-NEXT:  # BB#2: # %exit
> -; X86-NEXT:    addl %ecx, %esi
> -; X86-NEXT:    leal 1(%ecx,%esi), %edx
> +; X86-NEXT:    addl %ecx, %edx
>  ; X86-NEXT:    addl %ecx, %edx
>  ; X86-NEXT:    addl %ecx, %edx
>  ; X86-NEXT:    addl %ecx, %edx
> @@ -116,7 +99,6 @@ define void @foo_loop(%struct.SA* nocapt
>  ; X86-NEXT:    addl %ecx, %edx
>  ; X86-NEXT:    movl %edx, 16(%eax)
>  ; X86-NEXT:    popl %esi
> -; X86-NEXT:    popl %edi
>  ; X86-NEXT:    retl
>   entry:
>     br label %loop
>
> Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/mul-constant-i16.ll Wed Oct  4 02:02:10 2017
> @@ -558,11 +558,10 @@ define i16 @test_mul_by_28(i16 %x) {
>  define i16 @test_mul_by_29(i16 %x) {
>  ; X86-LABEL: test_mul_by_29:
>  ; X86:       # BB#0:
> -; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
> -; X86-NEXT:    leal (%ecx,%ecx,8), %eax
> -; X86-NEXT:    leal (%eax,%eax,2), %eax
> -; X86-NEXT:    addl %ecx, %eax
> -; X86-NEXT:    addl %ecx, %eax
> +; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
> +; X86-NEXT:    leal (%eax,%eax,8), %ecx
> +; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
> +; X86-NEXT:    leal (%ecx,%eax,2), %eax
>  ; X86-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
>  ; X86-NEXT:    retl
>  ;
> @@ -571,8 +570,7 @@ define i16 @test_mul_by_29(i16 %x) {
>  ; X64-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
>  ; X64-NEXT:    leal (%rdi,%rdi,8), %eax
>  ; X64-NEXT:    leal (%rax,%rax,2), %eax
> -; X64-NEXT:    addl %edi, %eax
> -; X64-NEXT:    addl %edi, %eax
> +; X64-NEXT:    leal (%rax,%rdi,2), %eax
>  ; X64-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
>  ; X64-NEXT:    retq
>    %mul = mul nsw i16 %x, 29
>
> Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/mul-constant-i32.ll Wed Oct  4 02:02:10 2017
> @@ -1457,11 +1457,10 @@ define i32 @test_mul_by_28(i32 %x) {
>  define i32 @test_mul_by_29(i32 %x) {
>  ; X86-LABEL: test_mul_by_29:
>  ; X86:       # BB#0:
> -; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
> -; X86-NEXT:    leal (%ecx,%ecx,8), %eax
> -; X86-NEXT:    leal (%eax,%eax,2), %eax
> -; X86-NEXT:    addl %ecx, %eax
> -; X86-NEXT:    addl %ecx, %eax
> +; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
> +; X86-NEXT:    leal (%eax,%eax,8), %ecx
> +; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
> +; X86-NEXT:    leal (%ecx,%eax,2), %eax
>  ; X86-NEXT:    retl
>  ;
>  ; X64-HSW-LABEL: test_mul_by_29:
> @@ -1469,8 +1468,7 @@ define i32 @test_mul_by_29(i32 %x) {
>  ; X64-HSW-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
>  ; X64-HSW-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
>  ; X64-HSW-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
> -; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
> -; X64-HSW-NEXT:    addl %edi, %eax # sched: [1:0.25]
> +; X64-HSW-NEXT:    leal (%rax,%rdi,2), %eax # sched: [1:0.50]
>  ; X64-HSW-NEXT:    retq # sched: [2:1.00]
>  ;
>  ; X64-JAG-LABEL: test_mul_by_29:
> @@ -1478,8 +1476,7 @@ define i32 @test_mul_by_29(i32 %x) {
>  ; X64-JAG-NEXT:    # kill: %EDI<def> %EDI<kill> %RDI<def>
>  ; X64-JAG-NEXT:    leal (%rdi,%rdi,8), %eax # sched: [1:0.50]
>  ; X64-JAG-NEXT:    leal (%rax,%rax,2), %eax # sched: [1:0.50]
> -; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
> -; X64-JAG-NEXT:    addl %edi, %eax # sched: [1:0.50]
> +; X64-JAG-NEXT:    leal (%rax,%rdi,2), %eax # sched: [1:0.50]
>  ; X64-JAG-NEXT:    retq # sched: [4:1.00]
>  ;
>  ; X86-NOOPT-LABEL: test_mul_by_29:
>
> Modified: llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/mul-constant-i64.ll Wed Oct  4 02:02:10 2017
> @@ -1523,8 +1523,7 @@ define i64 @test_mul_by_29(i64 %x) {
>  ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
>  ; X86-NEXT:    leal (%eax,%eax,8), %ecx
>  ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
> -; X86-NEXT:    addl %eax, %ecx
> -; X86-NEXT:    addl %eax, %ecx
> +; X86-NEXT:    leal (%ecx,%eax,2), %ecx
>  ; X86-NEXT:    movl $29, %eax
>  ; X86-NEXT:    mull {{[0-9]+}}(%esp)
>  ; X86-NEXT:    addl %ecx, %edx
> @@ -1534,16 +1533,14 @@ define i64 @test_mul_by_29(i64 %x) {
>  ; X64-HSW:       # BB#0:
>  ; X64-HSW-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
>  ; X64-HSW-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
> -; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
> -; X64-HSW-NEXT:    addq %rdi, %rax # sched: [1:0.25]
> +; X64-HSW-NEXT:    leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
>  ; X64-HSW-NEXT:    retq # sched: [2:1.00]
>  ;
>  ; X64-JAG-LABEL: test_mul_by_29:
>  ; X64-JAG:       # BB#0:
>  ; X64-JAG-NEXT:    leaq (%rdi,%rdi,8), %rax # sched: [1:0.50]
>  ; X64-JAG-NEXT:    leaq (%rax,%rax,2), %rax # sched: [1:0.50]
> -; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
> -; X64-JAG-NEXT:    addq %rdi, %rax # sched: [1:0.50]
> +; X64-JAG-NEXT:    leaq (%rax,%rdi,2), %rax # sched: [1:0.50]
>  ; X64-JAG-NEXT:    retq # sched: [4:1.00]
>  ;
>  ; X86-NOOPT-LABEL: test_mul_by_29:
>
> Modified: llvm/trunk/test/CodeGen/X86/mul-constant-result.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/mul-constant-result.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/mul-constant-result.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/mul-constant-result.ll Wed Oct  4 02:02:10 2017
> @@ -166,8 +166,7 @@ define i32 @mult(i32, i32) local_unnamed
>  ; X86-NEXT:  .LBB0_35:
>  ; X86-NEXT:    leal (%eax,%eax,8), %ecx
>  ; X86-NEXT:    leal (%ecx,%ecx,2), %ecx
> -; X86-NEXT:    addl %eax, %ecx
> -; X86-NEXT:    addl %ecx, %eax
> +; X86-NEXT:    leal (%ecx,%eax,2), %eax
>  ; X86-NEXT:    popl %esi
>  ; X86-NEXT:    retl
>  ; X86-NEXT:  .LBB0_36:
> @@ -325,16 +324,17 @@ define i32 @mult(i32, i32) local_unnamed
>  ; X64-HSW-NEXT:  .LBB0_31:
>  ; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
>  ; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
> -; X64-HSW-NEXT:    jmp .LBB0_17
> -; X64-HSW-NEXT:  .LBB0_32:
> -; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
> -; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
> -; X64-HSW-NEXT:    addl %eax, %ecx
>  ; X64-HSW-NEXT:  .LBB0_17:
>  ; X64-HSW-NEXT:    addl %eax, %ecx
>  ; X64-HSW-NEXT:    movl %ecx, %eax
>  ; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
>  ; X64-HSW-NEXT:    retq
> +; X64-HSW-NEXT:  .LBB0_32:
> +; X64-HSW-NEXT:    leal (%rax,%rax,8), %ecx
> +; X64-HSW-NEXT:    leal (%rcx,%rcx,2), %ecx
> +; X64-HSW-NEXT:    leal (%rcx,%rax,2), %eax
> +; X64-HSW-NEXT:    # kill: %EAX<def> %EAX<kill> %RAX<kill>
> +; X64-HSW-NEXT:    retq
>  ; X64-HSW-NEXT:  .LBB0_33:
>  ; X64-HSW-NEXT:    movl %eax, %ecx
>  ; X64-HSW-NEXT:    shll $5, %ecx
>
> Modified: llvm/trunk/test/CodeGen/X86/pr34629.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr34629.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/pr34629.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/pr34629.ll Wed Oct  4 02:02:10 2017
> @@ -13,8 +13,8 @@ define void @c() local_unnamed_addr #0 {
>  ; CHECK:       # BB#0: # %entry
>  ; CHECK-NEXT:    movq {{.*}}(%rip), %rax
>  ; CHECK-NEXT:    leaq (%rax,%rax,4), %rcx
> +; CHECK-NEXT:    leaq (%rcx,%rax,4), %rax
>  ; CHECK-NEXT:    negq %rcx
> -; CHECK-NEXT:    leaq (%rax,%rax,8), %rax
>  ; CHECK-NEXT:    leaq (%rax,%rax,4), %rax
>  ; CHECK-NEXT:    testq %rax, %rcx
>  ; CHECK-NEXT:    je .LBB0_2
>
> Modified: llvm/trunk/test/CodeGen/X86/pr34634.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr34634.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/pr34634.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/pr34634.ll Wed Oct  4 02:02:10 2017
> @@ -14,11 +14,11 @@ define void @fn1() local_unnamed_addr #0
>  ; CHECK-NEXT:    movslq {{.*}}(%rip), %rax
>  ; CHECK-NEXT:    leaq (%rax,%rax,4), %rcx
>  ; CHECK-NEXT:    leaq (,%rax,4), %rdx
> -; CHECK-NEXT:    movl a(%rdx,%rcx,8), %ecx
> -; CHECK-NEXT:    leaq (%rax,%rax,8), %rdx
> -; CHECK-NEXT:    leaq (%rdx,%rdx,2), %rdx
> -; CHECK-NEXT:    addq %rax, %rdx
> -; CHECK-NEXT:    movl %ecx, b(%rdx,%rax,4)
> +; CHECK-NEXT:    movl a(%rdx,%rcx,8), %edx
> +; CHECK-NEXT:    leaq (%rcx,%rax,4), %rcx
> +; CHECK-NEXT:    leaq (%rcx,%rcx,2), %rcx
> +; CHECK-NEXT:    addq %rax, %rcx
> +; CHECK-NEXT:    movl %edx, b(%rcx,%rax,4)
>  ; CHECK-NEXT:    retq
>  entry:
>    %0 = load i32, i32* @c, align 4, !tbaa !2
> @@ -37,11 +37,11 @@ define i32 @main() local_unnamed_addr #0
>  ; CHECK-NEXT:    movslq {{.*}}(%rip), %rax
>  ; CHECK-NEXT:    leaq (%rax,%rax,4), %rcx
>  ; CHECK-NEXT:    leaq (,%rax,4), %rdx
> -; CHECK-NEXT:    movl a(%rdx,%rcx,8), %ecx
> -; CHECK-NEXT:    leaq (%rax,%rax,8), %rdx
> -; CHECK-NEXT:    leaq (%rdx,%rdx,2), %rdx
> -; CHECK-NEXT:    addq %rax, %rdx
> -; CHECK-NEXT:    movl %ecx, b(%rdx,%rax,4)
> +; CHECK-NEXT:    movl a(%rdx,%rcx,8), %edx
> +; CHECK-NEXT:    leaq (%rcx,%rax,4), %rcx
> +; CHECK-NEXT:    leaq (%rcx,%rcx,2), %rcx
> +; CHECK-NEXT:    addq %rax, %rcx
> +; CHECK-NEXT:    movl %edx, b(%rcx,%rax,4)
>  ; CHECK-NEXT:    xorl %eax, %eax
>  ; CHECK-NEXT:    retq
>  entry:
>
> Modified: llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/umul-with-overflow.ll Wed Oct  4 02:02:10 2017
> @@ -40,10 +40,10 @@ define i32 @test2(i32 %a, i32 %b) nounwi
>  ; X64-NEXT:    leal (%rdi,%rdi), %eax
>  ; X64-NEXT:    retq
>  entry:
> -       %tmp0 = add i32 %b, %a
> -       %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
> -       %tmp2 = extractvalue { i32, i1 } %tmp1, 0
> -       ret i32 %tmp2
> +        %tmp0 = add i32 %b, %a
> +        %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 2)
> +        %tmp2 = extractvalue { i32, i1 } %tmp1, 0
> +        ret i32 %tmp2
>  }
>
>  define i32 @test3(i32 %a, i32 %b) nounwind readnone {
> @@ -64,8 +64,8 @@ define i32 @test3(i32 %a, i32 %b) nounwi
>  ; X64-NEXT:    mull %ecx
>  ; X64-NEXT:    retq
>  entry:
> -       %tmp0 = add i32 %b, %a
> -       %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
> -       %tmp2 = extractvalue { i32, i1 } %tmp1, 0
> -       ret i32 %tmp2
> +        %tmp0 = add i32 %b, %a
> +        %tmp1 = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %tmp0, i32 4)
> +        %tmp2 = extractvalue { i32, i1 } %tmp1, 0
> +        ret i32 %tmp2
>  }
>
> Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll?rev=314886&r1=314885&r2=314886&view=diff
> ==============================================================================
> --- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll (original)
> +++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll Wed Oct  4 02:02:10 2017
> @@ -13,14 +13,14 @@
>  ; X64-NEXT: .p2align
>  ; X64: %loop
>  ; no complex address modes
> -; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
> +; X64-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
>  ;
>  ; X32: @simple
>  ; no expensive address computation in the preheader
>  ; X32-NOT: imul
>  ; X32: %loop
>  ; no complex address modes
> -; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
> +; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
>  define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
>  entry:
>    br label %loop
> @@ -103,7 +103,7 @@ exit:
>  ; X32-NOT: mov{{.*}}(%esp){{$}}
>  ; X32: %for.body{{$}}
>  ; no complex address modes
> -; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
> +; X32-NOT: [1-9]+(%{{[^)]+}},%{{[^)]+}},
>  ; no reloads
>  ; X32-NOT: (%esp)
>  define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits