[llvm] 8ce0c7b - Revert "[SLP]Improve analysis of same/alternate code ops and scheduling."

Alexey Bataev via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 27 15:22:14 PDT 2022


Author: Alexey Bataev
Date: 2022-10-27T15:21:35-07:00
New Revision: 8ce0c7b1c96de176a2a68b85f972a4917be963a8

URL: https://github.com/llvm/llvm-project/commit/8ce0c7b1c96de176a2a68b85f972a4917be963a8
DIFF: https://github.com/llvm/llvm-project/commit/8ce0c7b1c96de176a2a68b85f972a4917be963a8.diff

LOG: Revert "[SLP]Improve analysis of same/alternate code ops and scheduling."

This reverts commit dad64448c66975054d3d968232652a56eb93b451 to fix
a crash in https://lab.llvm.org/buildbot/#/builders/74/builds/14584

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
    llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
    llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e7b2787d15db8..583ba283b07ed 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -536,26 +536,24 @@ static bool isValidForAlternation(unsigned Opcode) {
 }
 
 static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
-                                       const TargetLibraryInfo &TLI,
                                        unsigned BaseIndex = 0);
 
 /// Checks if the provided operands of 2 cmp instructions are compatible, i.e.
 /// compatible instructions or constants, or just some other regular values.
 static bool areCompatibleCmpOps(Value *BaseOp0, Value *BaseOp1, Value *Op0,
-                                Value *Op1, const TargetLibraryInfo &TLI) {
+                                Value *Op1) {
   return (isConstant(BaseOp0) && isConstant(Op0)) ||
          (isConstant(BaseOp1) && isConstant(Op1)) ||
          (!isa<Instruction>(BaseOp0) && !isa<Instruction>(Op0) &&
           !isa<Instruction>(BaseOp1) && !isa<Instruction>(Op1)) ||
-         getSameOpcode({BaseOp0, Op0}, TLI).getOpcode() ||
-         getSameOpcode({BaseOp1, Op1}, TLI).getOpcode();
+         getSameOpcode({BaseOp0, Op0}).getOpcode() ||
+         getSameOpcode({BaseOp1, Op1}).getOpcode();
 }
 
 /// \returns true if a compare instruction \p CI has similar "look" and
 /// same predicate as \p BaseCI, "as is" or with its operands and predicate
 /// swapped, false otherwise.
-static bool isCmpSameOrSwapped(const CmpInst *BaseCI, const CmpInst *CI,
-                               const TargetLibraryInfo &TLI) {
+static bool isCmpSameOrSwapped(const CmpInst *BaseCI, const CmpInst *CI) {
   assert(BaseCI->getOperand(0)->getType() == CI->getOperand(0)->getType() &&
          "Assessing comparisons of 
diff erent types?");
   CmpInst::Predicate BasePred = BaseCI->getPredicate();
@@ -568,16 +566,15 @@ static bool isCmpSameOrSwapped(const CmpInst *BaseCI, const CmpInst *CI,
   Value *Op1 = CI->getOperand(1);
 
   return (BasePred == Pred &&
-          areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1, TLI)) ||
+          areCompatibleCmpOps(BaseOp0, BaseOp1, Op0, Op1)) ||
          (BasePred == SwappedPred &&
-          areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0, TLI));
+          areCompatibleCmpOps(BaseOp0, BaseOp1, Op1, Op0));
 }
 
 /// \returns analysis of the Instructions in \p VL described in
 /// InstructionsState, the Opcode that we suppose the whole list
 /// could be vectorized even if its structure is diverse.
 static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
-                                       const TargetLibraryInfo &TLI,
                                        unsigned BaseIndex) {
   // Make sure these are all Instructions.
   if (llvm::any_of(VL, [](Value *V) { return !isa<Instruction>(V); }))
@@ -595,19 +592,9 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
 
   // Check for one alternate opcode from another BinaryOperator.
   // TODO - generalize to support all operators (types, calls etc.).
-  auto *IBase = cast<Instruction>(VL[BaseIndex]);
-  Intrinsic::ID BaseID = 0;
-  SmallVector<VFInfo> BaseMappings;
-  if (auto *CallBase = dyn_cast<CallInst>(IBase)) {
-    BaseID = getVectorIntrinsicIDForCall(CallBase, &TLI);
-    BaseMappings = VFDatabase(*CallBase).getMappings(*CallBase);
-    if (!isTriviallyVectorizable(BaseID) && BaseMappings.empty())
-      return InstructionsState(VL[BaseIndex], nullptr, nullptr);
-  }
   for (int Cnt = 0, E = VL.size(); Cnt < E; Cnt++) {
-    auto *I = cast<Instruction>(VL[Cnt]);
-    unsigned InstOpcode = I->getOpcode();
-    if (IsBinOp && isa<BinaryOperator>(I)) {
+    unsigned InstOpcode = cast<Instruction>(VL[Cnt])->getOpcode();
+    if (IsBinOp && isa<BinaryOperator>(VL[Cnt])) {
       if (InstOpcode == Opcode || InstOpcode == AltOpcode)
         continue;
       if (Opcode == AltOpcode && isValidForAlternation(InstOpcode) &&
@@ -616,11 +603,9 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
         AltIndex = Cnt;
         continue;
       }
-    } else if (IsCastOp && isa<CastInst>(I)) {
-      Value *Op0 = IBase->getOperand(0);
-      Type *Ty0 = Op0->getType();
-      Value *Op1 = I->getOperand(0);
-      Type *Ty1 = Op1->getType();
+    } else if (IsCastOp && isa<CastInst>(VL[Cnt])) {
+      Type *Ty0 = cast<Instruction>(VL[BaseIndex])->getOperand(0)->getType();
+      Type *Ty1 = cast<Instruction>(VL[Cnt])->getOperand(0)->getType();
       if (Ty0 == Ty1) {
         if (InstOpcode == Opcode || InstOpcode == AltOpcode)
           continue;
@@ -649,11 +634,11 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
             (BasePred == CurrentPred || BasePred == SwappedCurrentPred))
           continue;
 
-        if (isCmpSameOrSwapped(BaseInst, Inst, TLI))
+        if (isCmpSameOrSwapped(BaseInst, Inst))
           continue;
         auto *AltInst = cast<CmpInst>(VL[AltIndex]);
         if (AltIndex != BaseIndex) {
-          if (isCmpSameOrSwapped(AltInst, Inst, TLI))
+          if (isCmpSameOrSwapped(AltInst, Inst))
             continue;
         } else if (BasePred != CurrentPred) {
           assert(
@@ -667,45 +652,8 @@ static InstructionsState getSameOpcode(ArrayRef<Value *> VL,
             AltPred == CurrentPred || AltPred == SwappedCurrentPred)
           continue;
       }
-    } else if (InstOpcode == Opcode || InstOpcode == AltOpcode) {
-      if (auto *Gep = dyn_cast<GetElementPtrInst>(I)) {
-        if (Gep->getNumOperands() != 2 ||
-            Gep->getOperand(0)->getType() != IBase->getOperand(0)->getType())
-          return InstructionsState(VL[BaseIndex], nullptr, nullptr);
-      } else if (auto *EI = dyn_cast<ExtractElementInst>(I)) {
-        if (!isVectorLikeInstWithConstOps(EI))
-          return InstructionsState(VL[BaseIndex], nullptr, nullptr);
-      } else if (auto *LI = dyn_cast<LoadInst>(I)) {
-        auto *BaseLI = cast<LoadInst>(IBase);
-        if (!LI->isSimple() || !BaseLI->isSimple())
-          return InstructionsState(VL[BaseIndex], nullptr, nullptr);
-      } else if (auto *Call = dyn_cast<CallInst>(I)) {
-        auto *CallBase = cast<CallInst>(IBase);
-        if (Call->getCalledFunction() != CallBase->getCalledFunction())
-          return InstructionsState(VL[BaseIndex], nullptr, nullptr);
-        if (Call->hasOperandBundles() &&
-            !std::equal(Call->op_begin() + Call->getBundleOperandsStartIndex(),
-                        Call->op_begin() + Call->getBundleOperandsEndIndex(),
-                        CallBase->op_begin() +
-                            CallBase->getBundleOperandsStartIndex()))
-          return InstructionsState(VL[BaseIndex], nullptr, nullptr);
-        Intrinsic::ID ID = getVectorIntrinsicIDForCall(Call, &TLI);
-        if (ID != BaseID)
-          return InstructionsState(VL[BaseIndex], nullptr, nullptr);
-        if (!ID) {
-          SmallVector<VFInfo> Mappings = VFDatabase(*Call).getMappings(*Call);
-          if (Mappings.size() != BaseMappings.size() ||
-              Mappings.front().ISA != BaseMappings.front().ISA ||
-              Mappings.front().ScalarName != BaseMappings.front().ScalarName ||
-              Mappings.front().VectorName != BaseMappings.front().VectorName ||
-              Mappings.front().Shape.VF != BaseMappings.front().Shape.VF ||
-              Mappings.front().Shape.Parameters !=
-                  BaseMappings.front().Shape.Parameters)
-            return InstructionsState(VL[BaseIndex], nullptr, nullptr);
-        }
-      }
+    } else if (InstOpcode == Opcode || InstOpcode == AltOpcode)
       continue;
-    }
     return InstructionsState(VL[BaseIndex], nullptr, nullptr);
   }
 
@@ -1149,7 +1097,6 @@ class BoUpSLP {
 
   /// A helper class used for scoring candidates for two consecutive lanes.
   class LookAheadHeuristics {
-    const TargetLibraryInfo &TLI;
     const DataLayout &DL;
     ScalarEvolution &SE;
     const BoUpSLP &R;
@@ -1157,11 +1104,9 @@ class BoUpSLP {
     int MaxLevel; // The maximum recursion depth for accumulating score.
 
   public:
-    LookAheadHeuristics(const TargetLibraryInfo &TLI, const DataLayout &DL,
-                        ScalarEvolution &SE, const BoUpSLP &R, int NumLanes,
-                        int MaxLevel)
-        : TLI(TLI), DL(DL), SE(SE), R(R), NumLanes(NumLanes),
-          MaxLevel(MaxLevel) {}
+    LookAheadHeuristics(const DataLayout &DL, ScalarEvolution &SE,
+                        const BoUpSLP &R, int NumLanes, int MaxLevel)
+        : DL(DL), SE(SE), R(R), NumLanes(NumLanes), MaxLevel(MaxLevel) {}
 
     // The hard-coded scores listed here are not very important, though it shall
     // be higher for better matches to improve the resulting cost. When
@@ -1183,8 +1128,6 @@ class BoUpSLP {
     static const int ScoreSplatLoads = 3;
     /// Loads from reversed memory addresses, e.g. load(A[i+1]), load(A[i]).
     static const int ScoreReversedLoads = 3;
-    /// A load candidate for masked gather.
-    static const int ScoreMaskedGatherCandidate = 1;
     /// ExtractElementInst from same vector and consecutive indexes.
     static const int ScoreConsecutiveExtracts = 4;
     /// ExtractElementInst from same vector and reversed indices.
@@ -1239,26 +1182,18 @@ class BoUpSLP {
       auto *LI1 = dyn_cast<LoadInst>(V1);
       auto *LI2 = dyn_cast<LoadInst>(V2);
       if (LI1 && LI2) {
-        if (LI1->getParent() != LI2->getParent() || !LI1->isSimple() ||
-            !LI2->isSimple())
+        if (LI1->getParent() != LI2->getParent())
           return LookAheadHeuristics::ScoreFail;
 
         Optional<int> Dist = getPointersDiff(
             LI1->getType(), LI1->getPointerOperand(), LI2->getType(),
             LI2->getPointerOperand(), DL, SE, /*StrictCheck=*/true);
-        if (!Dist || *Dist == 0) {
-          if (getUnderlyingObject(LI1->getPointerOperand()) ==
-                  getUnderlyingObject(LI2->getPointerOperand()) &&
-              R.TTI->isLegalMaskedGather(
-                  FixedVectorType::get(LI1->getType(), NumLanes),
-                  LI1->getAlign()))
-            return LookAheadHeuristics::ScoreMaskedGatherCandidate;
+        if (!Dist || *Dist == 0)
           return LookAheadHeuristics::ScoreFail;
-        }
         // The distance is too large - still may be profitable to use masked
         // loads/gathers.
         if (std::abs(*Dist) > NumLanes / 2)
-          return LookAheadHeuristics::ScoreMaskedGatherCandidate;
+          return LookAheadHeuristics::ScoreAltOpcodes;
         // This still will detect consecutive loads, but we might have "holes"
         // in some cases. It is ok for non-power-2 vectorization and may produce
         // better results. It should not affect current vectorization.
@@ -1315,7 +1250,7 @@ class BoUpSLP {
         SmallVector<Value *, 4> Ops(MainAltOps.begin(), MainAltOps.end());
         Ops.push_back(I1);
         Ops.push_back(I2);
-        InstructionsState S = getSameOpcode(Ops, TLI);
+        InstructionsState S = getSameOpcode(Ops);
         // Note: Only consider instructions with <= 2 operands to avoid
         // complexity explosion.
         if (S.getOpcode() &&
@@ -1491,7 +1426,6 @@ class BoUpSLP {
     /// A vector of operand vectors.
     SmallVector<OperandDataVec, 4> OpsVec;
 
-    const TargetLibraryInfo &TLI;
     const DataLayout &DL;
     ScalarEvolution &SE;
     const BoUpSLP &R;
@@ -1593,7 +1527,7 @@ class BoUpSLP {
     int getLookAheadScore(Value *LHS, Value *RHS, ArrayRef<Value *> MainAltOps,
                           int Lane, unsigned OpIdx, unsigned Idx,
                           bool &IsUsed) {
-      LookAheadHeuristics LookAhead(TLI, DL, SE, R, getNumLanes(),
+      LookAheadHeuristics LookAhead(DL, SE, R, getNumLanes(),
                                     LookAheadMaxDepth);
       // Keep track of the instruction stack as we recurse into the operands
       // during the look-ahead score exploration.
@@ -1815,7 +1749,7 @@ class BoUpSLP {
         // Use Boyer-Moore majority voting for finding the majority opcode and
         // the number of times it occurs.
         if (auto *I = dyn_cast<Instruction>(OpData.V)) {
-          if (!OpcodeI || !getSameOpcode({OpcodeI, I}, TLI).getOpcode() ||
+          if (!OpcodeI || !getSameOpcode({OpcodeI, I}).getOpcode() ||
               I->getParent() != Parent) {
             if (NumOpsWithSameOpcodeParent == 0) {
               NumOpsWithSameOpcodeParent = 1;
@@ -1917,9 +1851,9 @@ class BoUpSLP {
 
   public:
     /// Initialize with all the operands of the instruction vector \p RootVL.
-    VLOperands(ArrayRef<Value *> RootVL, const TargetLibraryInfo &TLI,
-               const DataLayout &DL, ScalarEvolution &SE, const BoUpSLP &R)
-        : TLI(TLI), DL(DL), SE(SE), R(R) {
+    VLOperands(ArrayRef<Value *> RootVL, const DataLayout &DL,
+               ScalarEvolution &SE, const BoUpSLP &R)
+        : DL(DL), SE(SE), R(R) {
       // Append all the operands of RootVL.
       appendOperandsOfVL(RootVL);
     }
@@ -2060,7 +1994,7 @@ class BoUpSLP {
               if (MainAltOps[OpIdx].size() != 2) {
                 OperandData &AltOp = getData(OpIdx, Lane);
                 InstructionsState OpS =
-                    getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V}, TLI);
+                    getSameOpcode({MainAltOps[OpIdx].front(), AltOp.V});
                 if (OpS.getOpcode() && OpS.isAltShuffle())
                   MainAltOps[OpIdx].push_back(AltOp.V);
               }
@@ -2135,7 +2069,7 @@ class BoUpSLP {
   Optional<int>
   findBestRootPair(ArrayRef<std::pair<Value *, Value *>> Candidates,
                    int Limit = LookAheadHeuristics::ScoreFail) {
-    LookAheadHeuristics LookAhead(*TLI, *DL, *SE, *this, /*NumLanes=*/2,
+    LookAheadHeuristics LookAhead(*DL, *SE, *this, /*NumLanes=*/2,
                                   RootLookAheadMaxDepth);
     int BestScore = Limit;
     Optional<int> Index;
@@ -2310,10 +2244,12 @@ class BoUpSLP {
 
   /// Reorder commutative or alt operands to get better probability of
   /// generating vectorized code.
-  static void reorderInputsAccordingToOpcode(
-      ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left,
-      SmallVectorImpl<Value *> &Right, const TargetLibraryInfo &TLI,
-      const DataLayout &DL, ScalarEvolution &SE, const BoUpSLP &R);
+  static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+                                             SmallVectorImpl<Value *> &Left,
+                                             SmallVectorImpl<Value *> &Right,
+                                             const DataLayout &DL,
+                                             ScalarEvolution &SE,
+                                             const BoUpSLP &R);
 
   /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
   /// users of \p TE and collects the stores. It returns the map from the store
@@ -2671,7 +2607,7 @@ class BoUpSLP {
                     return UndefValue::get(VL.front()->getType());
                   return VL[Idx];
                 });
-      InstructionsState S = getSameOpcode(Last->Scalars, *TLI);
+      InstructionsState S = getSameOpcode(Last->Scalars);
       Last->setOperations(S);
       Last->ReorderIndices.append(ReorderIndices.begin(), ReorderIndices.end());
     }
@@ -3604,7 +3540,6 @@ enum class LoadsState { Gather, Vectorize, ScatterVectorize };
 } // anonymous namespace
 
 static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
-                                  const TargetLibraryInfo &TLI,
                                   bool CompareOpcodes = true) {
   if (getUnderlyingObject(Ptr1) != getUnderlyingObject(Ptr2))
     return false;
@@ -3618,7 +3553,7 @@ static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
          ((isConstant(GEP1->getOperand(1)) &&
            isConstant(GEP2->getOperand(1))) ||
           !CompareOpcodes ||
-          getSameOpcode({GEP1->getOperand(1), GEP2->getOperand(1)}, TLI)
+          getSameOpcode({GEP1->getOperand(1), GEP2->getOperand(1)})
               .getOpcode());
 }
 
@@ -3627,7 +3562,7 @@ static bool arePointersCompatible(Value *Ptr1, Value *Ptr2,
 static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
                                     const TargetTransformInfo &TTI,
                                     const DataLayout &DL, ScalarEvolution &SE,
-                                    LoopInfo &LI, const TargetLibraryInfo &TLI,
+                                    LoopInfo &LI,
                                     SmallVectorImpl<unsigned> &Order,
                                     SmallVectorImpl<Value *> &PointerOps) {
   // Check that a vectorized load would load the same memory as a scalar
@@ -3657,8 +3592,8 @@ static LoadsState canVectorizeLoads(ArrayRef<Value *> VL, const Value *VL0,
   Order.clear();
   // Check the order of pointer operands or that all pointers are the same.
   bool IsSorted = sortPtrAccesses(PointerOps, ScalarTy, DL, SE, Order);
-  if (IsSorted || all_of(PointerOps, [&](Value *P) {
-        return arePointersCompatible(P, PointerOps.front(), TLI);
+  if (IsSorted || all_of(PointerOps, [&PointerOps](Value *P) {
+        return arePointersCompatible(P, PointerOps.front());
       })) {
     if (IsSorted) {
       Value *Ptr0;
@@ -4780,8 +4715,7 @@ static std::pair<size_t, size_t> generateKeySubkey(
 /// the given \p MainOp and \p AltOp instructions.
 static bool isAlternateInstruction(const Instruction *I,
                                    const Instruction *MainOp,
-                                   const Instruction *AltOp,
-                                   const TargetLibraryInfo &TLI);
+                                   const Instruction *AltOp);
 
 void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
                             const EdgeInfo &UserTreeIdx) {
@@ -4827,7 +4761,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     return true;
   };
 
-  InstructionsState S = getSameOpcode(VL, *TLI);
+  InstructionsState S = getSameOpcode(VL);
 
   // Gather if we hit the RecursionMaxDepth, unless this is a load (or z/sext of
   // a load), in which case peek through to include it in the tree, without
@@ -5035,7 +4969,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     // Reset S to make it GetElementPtr kind of node.
     const auto *It = find_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); });
     assert(It != VL.end() && "Expected at least one GEP.");
-    S = getSameOpcode(*It, *TLI);
+    S = getSameOpcode(*It);
   }
 
   // Check that all of the users of the scalars that we want to vectorize are
@@ -5239,8 +5173,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       SmallVector<Value *> PointerOps;
       OrdersType CurrentOrder;
       TreeEntry *TE = nullptr;
-      switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, *LI, *TLI,
-                                CurrentOrder, PointerOps)) {
+      switch (canVectorizeLoads(VL, VL0, *TTI, *DL, *SE, *LI, CurrentOrder,
+                                PointerOps)) {
       case LoadsState::Vectorize:
         if (CurrentOrder.empty()) {
           // Original loads are consecutive and does not require reordering.
@@ -5351,7 +5285,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         // Commutative predicate - collect + sort operands of the instructions
         // so that each side is more likely to have the same opcode.
         assert(P0 == SwapP0 && "Commutative Predicate mismatch");
-        reorderInputsAccordingToOpcode(VL, Left, Right, *TLI, *DL, *SE, *this);
+        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
       } else {
         // Collect operands - commute if it uses the swapped predicate.
         for (Value *V : VL) {
@@ -5398,7 +5332,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       // have the same opcode.
       if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) {
         ValueList Left, Right;
-        reorderInputsAccordingToOpcode(VL, Left, Right, *TLI, *DL, *SE, *this);
+        reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
         TE->setOperand(0, Left);
         TE->setOperand(1, Right);
         buildTree_rec(Left, Depth + 1, {TE, 0});
@@ -5709,8 +5643,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         if (!CI || all_of(VL, [](Value *V) {
               return cast<CmpInst>(V)->isCommutative();
             })) {
-          reorderInputsAccordingToOpcode(VL, Left, Right, *TLI, *DL, *SE,
-                                         *this);
+          reorderInputsAccordingToOpcode(VL, Left, Right, *DL, *SE, *this);
         } else {
           auto *MainCI = cast<CmpInst>(S.MainOp);
           auto *AltCI = cast<CmpInst>(S.AltOp);
@@ -5725,7 +5658,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
             Value *LHS = Cmp->getOperand(0);
             Value *RHS = Cmp->getOperand(1);
 
-            if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
+            if (isAlternateInstruction(Cmp, MainCI, AltCI)) {
               if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
                 std::swap(LHS, RHS);
             } else {
@@ -6019,17 +5952,16 @@ buildShuffleEntryMask(ArrayRef<Value *> VL, ArrayRef<unsigned> ReorderIndices,
 
 static bool isAlternateInstruction(const Instruction *I,
                                    const Instruction *MainOp,
-                                   const Instruction *AltOp,
-                                   const TargetLibraryInfo &TLI) {
+                                   const Instruction *AltOp) {
   if (auto *MainCI = dyn_cast<CmpInst>(MainOp)) {
     auto *AltCI = cast<CmpInst>(AltOp);
     CmpInst::Predicate MainP = MainCI->getPredicate();
     CmpInst::Predicate AltP = AltCI->getPredicate();
     assert(MainP != AltP && "Expected 
diff erent main/alternate predicates.");
     auto *CI = cast<CmpInst>(I);
-    if (isCmpSameOrSwapped(MainCI, CI, TLI))
+    if (isCmpSameOrSwapped(MainCI, CI))
       return false;
-    if (isCmpSameOrSwapped(AltCI, CI, TLI))
+    if (isCmpSameOrSwapped(AltCI, CI))
       return true;
     CmpInst::Predicate P = CI->getPredicate();
     CmpInst::Predicate SwappedP = CmpInst::getSwappedPredicate(P);
@@ -6299,7 +6231,7 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
             OrdersType CurrentOrder;
             LoadsState LS =
                 canVectorizeLoads(Slice, Slice.front(), *TTI, *DL, *SE, *LI,
-                                  *TLI, CurrentOrder, PointerOps);
+                                  CurrentOrder, PointerOps);
             switch (LS) {
             case LoadsState::Vectorize:
             case LoadsState::ScatterVectorize:
@@ -7734,13 +7666,15 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL) const {
 
 // Perform operand reordering on the instructions in VL and return the reordered
 // operands in Left and Right.
-void BoUpSLP::reorderInputsAccordingToOpcode(
-    ArrayRef<Value *> VL, SmallVectorImpl<Value *> &Left,
-    SmallVectorImpl<Value *> &Right, const TargetLibraryInfo &TLI,
-    const DataLayout &DL, ScalarEvolution &SE, const BoUpSLP &R) {
+void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+                                             SmallVectorImpl<Value *> &Left,
+                                             SmallVectorImpl<Value *> &Right,
+                                             const DataLayout &DL,
+                                             ScalarEvolution &SE,
+                                             const BoUpSLP &R) {
   if (VL.empty())
     return;
-  VLOperands Ops(VL, TLI, DL, SE, R);
+  VLOperands Ops(VL, DL, SE, R);
   // Reorder the operands in place.
   Ops.reorder();
   Left = Ops.getVL(0);
@@ -8033,13 +7967,13 @@ class ShuffleInstructionBuilder {
 
 Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL) {
   const unsigned VF = VL.size();
-  InstructionsState S = getSameOpcode(VL, *TLI);
+  InstructionsState S = getSameOpcode(VL);
   // Special processing for GEPs bundle, which may include non-gep values.
   if (!S.getOpcode() && VL.front()->getType()->isPointerTy()) {
     const auto *It =
         find_if(VL, [](Value *V) { return isa<GetElementPtrInst>(V); });
     if (It != VL.end())
-      S = getSameOpcode(*It, *TLI);
+      S = getSameOpcode(*It);
   }
   if (S.getOpcode()) {
     if (TreeEntry *E = getTreeEntry(S.OpValue))
@@ -8757,10 +8691,9 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       SmallVector<int> Mask;
       buildShuffleEntryMask(
           E->Scalars, E->ReorderIndices, E->ReuseShuffleIndices,
-          [E, this](Instruction *I) {
+          [E](Instruction *I) {
             assert(E->isOpcodeOrAlt(I) && "Unexpected main/alternate opcode");
-            return isAlternateInstruction(I, E->getMainOp(), E->getAltOp(),
-                                          *TLI);
+            return isAlternateInstruction(I, E->getMainOp(), E->getAltOp());
           },
           Mask, &OpScalars, &AltScalars);
 
@@ -10674,7 +10607,7 @@ bool SLPVectorizerPass::tryToVectorizeList(ArrayRef<Value *> VL, BoUpSLP &R,
 
   // Check that all of the parts are instructions of the same type,
   // we permit an alternate opcode via InstructionsState.
-  InstructionsState S = getSameOpcode(VL, *TLI);
+  InstructionsState S = getSameOpcode(VL);
   if (!S.getOpcode())
     return false;
 
@@ -11319,7 +11252,7 @@ class HorizontalReduction {
                     }
                     for (LoadInst *RLI : LIt->second) {
                       if (arePointersCompatible(RLI->getPointerOperand(),
-                                                LI->getPointerOperand(), TLI)) {
+                                                LI->getPointerOperand())) {
                         hash_code SubKey = hash_value(RLI->getPointerOperand());
                         DoNotReverseVals.insert(RLI);
                         return SubKey;
@@ -11362,7 +11295,7 @@ class HorizontalReduction {
                   }
                   for (LoadInst *RLI : LIt->second) {
                     if (arePointersCompatible(RLI->getPointerOperand(),
-                                              LI->getPointerOperand(), TLI)) {
+                                              LI->getPointerOperand())) {
                       hash_code SubKey = hash_value(RLI->getPointerOperand());
                       DoNotReverseVals.insert(RLI);
                       return SubKey;
@@ -11433,8 +11366,7 @@ class HorizontalReduction {
   }
 
   /// Attempt to vectorize the tree found by matchAssociativeReduction.
-  Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI,
-                     const TargetLibraryInfo &TLI) {
+  Value *tryToReduce(BoUpSLP &V, TargetTransformInfo *TTI) {
     constexpr int ReductionLimit = 4;
     constexpr unsigned RegMaxNumber = 4;
     constexpr unsigned RedValsMaxNumber = 128;
@@ -11507,7 +11439,7 @@ class HorizontalReduction {
     // Try to vectorize elements based on their type.
     for (unsigned I = 0, E = ReducedVals.size(); I < E; ++I) {
       ArrayRef<Value *> OrigReducedVals = ReducedVals[I];
-      InstructionsState S = getSameOpcode(OrigReducedVals, TLI);
+      InstructionsState S = getSameOpcode(OrigReducedVals);
       SmallVector<Value *> Candidates;
       Candidates.reserve(2 * OrigReducedVals.size());
       DenseMap<Value *, Value *> TrackedToOrig(2 * OrigReducedVals.size());
@@ -11527,7 +11459,7 @@ class HorizontalReduction {
       // Try to handle shuffled extractelements.
       if (S.getOpcode() == Instruction::ExtractElement && !S.isAltShuffle() &&
           I + 1 < E) {
-        InstructionsState NextS = getSameOpcode(ReducedVals[I + 1], TLI);
+        InstructionsState NextS = getSameOpcode(ReducedVals[I + 1]);
         if (NextS.getOpcode() == Instruction::ExtractElement &&
             !NextS.isAltShuffle()) {
           SmallVector<Value *> CommonCandidates(Candidates);
@@ -12193,7 +12125,7 @@ bool SLPVectorizerPass::vectorizeHorReduction(
     if (IsBinop || IsSelect) {
       HorizontalReduction HorRdx;
       if (HorRdx.matchAssociativeReduction(P, Inst, *SE, *DL, *TLI))
-        return HorRdx.tryToReduce(R, TTI, *TLI);
+        return HorRdx.tryToReduce(R, TTI);
     }
     return nullptr;
   };
@@ -12386,7 +12318,7 @@ tryToVectorizeSequence(SmallVectorImpl<T *> &Incoming,
 /// predicate of the second or the operands IDs are less than the operands IDs
 /// of the second cmp instruction.
 template <bool IsCompatibility>
-static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI,
+static bool compareCmp(Value *V, Value *V2,
                        function_ref<bool(Instruction *)> IsDeleted) {
   auto *CI1 = cast<CmpInst>(V);
   auto *CI2 = cast<CmpInst>(V2);
@@ -12422,7 +12354,7 @@ static bool compareCmp(Value *V, Value *V2, TargetLibraryInfo &TLI,
       if (auto *I2 = dyn_cast<Instruction>(Op2)) {
         if (I1->getParent() != I2->getParent())
           return false;
-        InstructionsState S = getSameOpcode({I1, I2}, TLI);
+        InstructionsState S = getSameOpcode({I1, I2});
         if (S.getOpcode())
           continue;
         return false;
@@ -12476,15 +12408,15 @@ bool SLPVectorizerPass::vectorizeSimpleInstructions(InstSetVector &Instructions,
     }
     // Try to vectorize list of compares.
     // Sort by type, compare predicate, etc.
-    auto CompareSorter = [&](Value *V, Value *V2) {
-      return compareCmp<false>(V, V2, *TLI,
+    auto &&CompareSorter = [&R](Value *V, Value *V2) {
+      return compareCmp<false>(V, V2,
                                [&R](Instruction *I) { return R.isDeleted(I); });
     };
 
-    auto AreCompatibleCompares = [&](Value *V1, Value *V2) {
+    auto &&AreCompatibleCompares = [&R](Value *V1, Value *V2) {
       if (V1 == V2)
         return true;
-      return compareCmp<true>(V1, V2, *TLI,
+      return compareCmp<true>(V1, V2,
                               [&R](Instruction *I) { return R.isDeleted(I); });
     };
     auto Limit = [&R](Value *V) {
@@ -12566,7 +12498,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
                  "Different nodes should have 
diff erent DFS numbers");
           if (NodeI1 != NodeI2)
             return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
-          InstructionsState S = getSameOpcode({I1, I2}, *TLI);
+          InstructionsState S = getSameOpcode({I1, I2});
           if (S.getOpcode())
             continue;
           return I1->getOpcode() < I2->getOpcode();
@@ -12583,7 +12515,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
     }
     return ConstOrder && *ConstOrder;
   };
-  auto AreCompatiblePHIs = [&PHIToOpcodes, this](Value *V1, Value *V2) {
+  auto AreCompatiblePHIs = [&PHIToOpcodes](Value *V1, Value *V2) {
     if (V1 == V2)
       return true;
     if (V1->getType() != V2->getType())
@@ -12600,7 +12532,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
         if (auto *I2 = dyn_cast<Instruction>(Opcodes2[I])) {
           if (I1->getParent() != I2->getParent())
             return false;
-          InstructionsState S = getSameOpcode({I1, I2}, *TLI);
+          InstructionsState S = getSameOpcode({I1, I2});
           if (S.getOpcode())
             continue;
           return false;
@@ -12903,7 +12835,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
                "Different nodes should have 
diff erent DFS numbers");
         if (NodeI1 != NodeI2)
           return NodeI1->getDFSNumIn() < NodeI2->getDFSNumIn();
-        InstructionsState S = getSameOpcode({I1, I2}, *TLI);
+        InstructionsState S = getSameOpcode({I1, I2});
         if (S.getOpcode())
           return false;
         return I1->getOpcode() < I2->getOpcode();
@@ -12915,7 +12847,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
            V2->getValueOperand()->getValueID();
   };
 
-  auto &&AreCompatibleStores = [this](StoreInst *V1, StoreInst *V2) {
+  auto &&AreCompatibleStores = [](StoreInst *V1, StoreInst *V2) {
     if (V1 == V2)
       return true;
     if (V1->getPointerOperandType() != V2->getPointerOperandType())
@@ -12928,7 +12860,7 @@ bool SLPVectorizerPass::vectorizeStoreChains(BoUpSLP &R) {
       if (auto *I2 = dyn_cast<Instruction>(V2->getValueOperand())) {
         if (I1->getParent() != I2->getParent())
           return false;
-        InstructionsState S = getSameOpcode({I1, I2}, *TLI);
+        InstructionsState S = getSameOpcode({I1, I2});
         return S.getOpcode() > 0;
       }
     if (isa<Constant>(V1->getValueOperand()) &&

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
index 06c47b2e33854..65f521411d739 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/simplebb.ll
@@ -62,11 +62,11 @@ define void @test2(double* %a, double* %b, i8* %e) {
 ; Don't vectorize volatile loads.
 define void @test_volatile_load(double* %a, double* %b, double* %c) {
 ; CHECK-LABEL: @test_volatile_load(
-; CHECK-NEXT:    [[I0:%.*]] = load volatile double, double* [[A:%.*]], align 8
-; CHECK-NEXT:    [[I1:%.*]] = load volatile double, double* [[B:%.*]], align 8
-; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds double, double* [[A:%.*]], i64 1
+; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B:%.*]], i64 1
+; CHECK-NEXT:    [[I0:%.*]] = load volatile double, double* [[A]], align 8
+; CHECK-NEXT:    [[I1:%.*]] = load volatile double, double* [[B]], align 8
 ; CHECK-NEXT:    [[I3:%.*]] = load double, double* [[ARRAYIDX3]], align 8
-; CHECK-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds double, double* [[B]], i64 1
 ; CHECK-NEXT:    [[I4:%.*]] = load double, double* [[ARRAYIDX4]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[I0]], i32 0
 ; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[I3]], i32 1

diff  --git a/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll b/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
index 12d2f6a541850..8137668e978bf 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sin-sqrt.ll
@@ -17,22 +17,22 @@ define void @test() {
 ; CHECK-NEXT:    [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 5), align 8
 ; CHECK-NEXT:    [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 6), align 8
 ; CHECK-NEXT:    [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 7), align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A2]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> [[TMP1]], double [[A6]], i32 1
-; CHECK-NEXT:    [[TMP3:%.*]] = call fast <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> poison, double [[A3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> [[TMP4]], double [[A7]], i32 1
-; CHECK-NEXT:    [[TMP6:%.*]] = call fast <2 x double> @llvm.sin.v2f64(<2 x double> [[TMP5]])
-; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[A4]], i32 1
-; CHECK-NEXT:    [[TMP9:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP8]])
-; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x double> poison, double [[A1]], i32 0
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <2 x double> [[TMP10]], double [[A5]], i32 1
-; CHECK-NEXT:    [[TMP12:%.*]] = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP11]])
-; CHECK-NEXT:    [[TMP13:%.*]] = fadd fast <2 x double> [[TMP9]], [[TMP6]]
-; CHECK-NEXT:    [[TMP14:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP12]]
-; CHECK-NEXT:    [[TMP15:%.*]] = fadd fast <2 x double> [[TMP13]], [[TMP14]]
-; CHECK-NEXT:    store <2 x double> [[TMP15]], <2 x double>* bitcast ([8 x double]* @dst to <2 x double>*), align 8
+; CHECK-NEXT:    [[SIN0:%.*]] = call fast double @llvm.sin.f64(double [[A2]])
+; CHECK-NEXT:    [[SIN1:%.*]] = call fast double @llvm.sin.f64(double [[A3]])
+; CHECK-NEXT:    [[SQRT0:%.*]] = call fast double @llvm.sqrt.f64(double [[A0]])
+; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[A1]])
+; CHECK-NEXT:    [[SIN2:%.*]] = call fast double @llvm.sin.f64(double [[A6]])
+; CHECK-NEXT:    [[SIN3:%.*]] = call fast double @llvm.sin.f64(double [[A7]])
+; CHECK-NEXT:    [[SQRT2:%.*]] = call fast double @llvm.sqrt.f64(double [[A4]])
+; CHECK-NEXT:    [[SQRT3:%.*]] = call fast double @llvm.sqrt.f64(double [[A5]])
+; CHECK-NEXT:    [[RES1:%.*]] = fadd fast double [[SQRT0]], [[SIN1]]
+; CHECK-NEXT:    [[RES2:%.*]] = fadd fast double [[SIN0]], [[SQRT1]]
+; CHECK-NEXT:    [[RES00:%.*]] = fadd fast double [[RES1]], [[RES2]]
+; CHECK-NEXT:    [[RES3:%.*]] = fadd fast double [[SQRT2]], [[SIN3]]
+; CHECK-NEXT:    [[RES4:%.*]] = fadd fast double [[SIN2]], [[SQRT3]]
+; CHECK-NEXT:    [[RES01:%.*]] = fadd fast double [[RES3]], [[RES4]]
+; CHECK-NEXT:    store double [[RES00]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst, i32 0, i64 0), align 8
+; CHECK-NEXT:    store double [[RES01]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst, i32 0, i64 1), align 8
 ; CHECK-NEXT:    ret void
 ;
   %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src, i32 0, i64 0), align 8


        


More information about the llvm-commits mailing list