[llvm] 6307e4b - Revert "[SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands. (#113880)"

Nikita Popov via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 6 05:27:15 PST 2024


Author: Nikita Popov
Date: 2024-12-06T14:27:03+01:00
New Revision: 6307e4b31efee4b5a396da1df2e0939ab9009f11

URL: https://github.com/llvm/llvm-project/commit/6307e4b31efee4b5a396da1df2e0939ab9009f11
DIFF: https://github.com/llvm/llvm-project/commit/6307e4b31efee4b5a396da1df2e0939ab9009f11.diff

LOG: Revert "[SLP] NFC. Replace TreeEntry::setOperandsInOrder with VLOperands. (#113880)"

This reverts commit 94fbe7e3ae7c0ce4e9a7d801e7700457a36f731d.

Causes a crash when linking mafft in ReleaseLTO-g config.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index eb7ab924200699..51841a842ce0b0 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -2017,9 +2017,6 @@ class BoUpSLP {
 
     /// A vector of operand vectors.
     SmallVector<OperandDataVec, 4> OpsVec;
-    /// When VL[0] is IntrinsicInst, ArgSize is CallBase::arg_size. When VL[0]
-    /// is not IntrinsicInst, ArgSize is User::getNumOperands.
-    unsigned ArgSize = 0;
 
     const TargetLibraryInfo &TLI;
     const DataLayout &DL;
@@ -2407,12 +2404,10 @@ class BoUpSLP {
       assert(!VL.empty() && "Bad VL");
       assert((empty() || VL.size() == getNumLanes()) &&
              "Expected same number of lanes");
-      // IntrinsicInst::isCommutative returns true if swapping the first "two"
-      // arguments to the intrinsic produces the same result.
       constexpr unsigned IntrinsicNumOperands = 2;
       auto *VL0 = cast<Instruction>(*find_if(VL, IsaPred<Instruction>));
-      unsigned NumOperands = VL0->getNumOperands();
-      ArgSize = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands : NumOperands;
+      unsigned NumOperands = isa<IntrinsicInst>(VL0) ? IntrinsicNumOperands
+                                                     : VL0->getNumOperands();
       OpsVec.resize(NumOperands);
       unsigned NumLanes = VL.size();
       for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) {
@@ -2445,7 +2440,7 @@ class BoUpSLP {
     }
 
     /// \returns the number of operands.
-    unsigned getNumOperands() const { return ArgSize; }
+    unsigned getNumOperands() const { return OpsVec.size(); }
 
     /// \returns the number of lanes.
     unsigned getNumLanes() const { return OpsVec[0].size(); }
@@ -2622,8 +2617,7 @@ class BoUpSLP {
         ArrayRef<OperandData> Op0 = OpsVec.front();
         for (const OperandData &Data : Op0)
           UniqueValues.insert(Data.V);
-        for (ArrayRef<OperandData> Op :
-             ArrayRef(OpsVec).slice(1, getNumOperands() - 1)) {
+        for (ArrayRef<OperandData> Op : drop_begin(OpsVec, 1)) {
           if (any_of(Op, [&UniqueValues](const OperandData &Data) {
                 return !UniqueValues.contains(Data.V);
               }))
@@ -3144,6 +3138,13 @@ class BoUpSLP {
                            SmallVector<SmallVector<std::pair<LoadInst *, int>>>,
                            8> &GatheredLoads);
 
+  /// Reorder commutative or alt operands to get better probability of
+  /// generating vectorized code.
+  static void reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+                                             SmallVectorImpl<Value *> &Left,
+                                             SmallVectorImpl<Value *> &Right,
+                                             const BoUpSLP &R);
+
   /// Helper for `findExternalStoreUsersReorderIndices()`. It iterates over the
   /// users of \p TE and collects the stores. It returns the map from the store
   /// pointers to the collected stores.
@@ -3338,15 +3339,27 @@ class BoUpSLP {
       copy(OpVL, Operands[OpIdx].begin());
     }
 
-    /// Set this bundle's operand from \p VL.
-    void setOperand(ArrayRef<Value *> VL, const BoUpSLP &R,
-                    bool RequireReorder = false) {
-      VLOperands Ops(VL, R);
-      if (RequireReorder)
-        Ops.reorder();
-      for (unsigned I :
-           seq<unsigned>(cast<Instruction>(VL[0])->getNumOperands()))
-        setOperand(I, Ops.getVL(I));
+    /// Set the operands of this bundle in their original order.
+    void setOperandsInOrder() {
+      assert(Operands.empty() && "Already initialized?");
+      auto *I0 = cast<Instruction>(*find_if(Scalars, IsaPred<Instruction>));
+      Operands.resize(I0->getNumOperands());
+      unsigned NumLanes = Scalars.size();
+      for (unsigned OpIdx = 0, NumOperands = I0->getNumOperands();
+           OpIdx != NumOperands; ++OpIdx) {
+        Operands[OpIdx].resize(NumLanes);
+        for (unsigned Lane = 0; Lane != NumLanes; ++Lane) {
+          if (isa<PoisonValue>(Scalars[Lane])) {
+            Operands[OpIdx][Lane] =
+                PoisonValue::get(I0->getOperand(OpIdx)->getType());
+            continue;
+          }
+          auto *I = cast<Instruction>(Scalars[Lane]);
+          assert(I->getNumOperands() == NumOperands &&
+                 "Expected same number of operands");
+          Operands[OpIdx][Lane] = I->getOperand(OpIdx);
+        }
+      }
     }
 
     /// Reorders operands of the node to the given mask \p Mask.
@@ -8446,7 +8459,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
                                    {}, CurrentOrder);
       LLVM_DEBUG(dbgs() << "SLP: added inserts bundle.\n");
 
-      TE->setOperand(VL, *this);
+      TE->setOperandsInOrder();
       buildTree_rec(TE->getOperand(1), Depth + 1, {TE, 1});
       return;
     }
@@ -8467,26 +8480,27 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           LLVM_DEBUG(dbgs() << "SLP: added a vector of loads.\n");
         else
           LLVM_DEBUG(dbgs() << "SLP: added a vector of jumbled loads.\n");
+        TE->setOperandsInOrder();
         break;
       case TreeEntry::StridedVectorize:
         // Vectorizing non-consecutive loads with `llvm.masked.gather`.
         TE = newTreeEntry(VL, TreeEntry::StridedVectorize, Bundle, S,
                           UserTreeIdx, ReuseShuffleIndices, CurrentOrder);
+        TE->setOperandsInOrder();
         LLVM_DEBUG(dbgs() << "SLP: added a vector of strided loads.\n");
         break;
       case TreeEntry::ScatterVectorize:
         // Vectorizing non-consecutive loads with `llvm.masked.gather`.
         TE = newTreeEntry(VL, TreeEntry::ScatterVectorize, Bundle, S,
                           UserTreeIdx, ReuseShuffleIndices);
+        TE->setOperandsInOrder();
+        buildTree_rec(PointerOps, Depth + 1, {TE, 0});
         LLVM_DEBUG(dbgs() << "SLP: added a vector of non-consecutive loads.\n");
         break;
       case TreeEntry::CombinedVectorize:
       case TreeEntry::NeedToGather:
         llvm_unreachable("Unexpected loads state.");
       }
-      TE->setOperand(VL, *this);
-      if (State == TreeEntry::ScatterVectorize)
-        buildTree_rec(PointerOps, Depth + 1, {TE, 0});
       return;
     }
     case Instruction::ZExt:
@@ -8524,8 +8538,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
                                    ReuseShuffleIndices);
       LLVM_DEBUG(dbgs() << "SLP: added a vector of casts.\n");
 
-      TE->setOperand(VL, *this);
-      for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+      TE->setOperandsInOrder();
+      for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
         buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
       if (ShuffleOrOp == Instruction::Trunc) {
         ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx);
@@ -8552,15 +8566,12 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       LLVM_DEBUG(dbgs() << "SLP: added a vector of compares.\n");
 
       ValueList Left, Right;
-      VLOperands Ops(VL, *this);
       if (cast<CmpInst>(VL0)->isCommutative()) {
         // Commutative predicate - collect + sort operands of the instructions
         // so that each side is more likely to have the same opcode.
         assert(P0 == CmpInst::getSwappedPredicate(P0) &&
                "Commutative Predicate mismatch");
-        Ops.reorder();
-        Left = Ops.getVL(0);
-        Right = Ops.getVL(1);
+        reorderInputsAccordingToOpcode(VL, Left, Right, *this);
       } else {
         // Collect operands - commute if it uses the swapped predicate.
         for (Value *V : VL) {
@@ -8621,8 +8632,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
                                    ReuseShuffleIndices);
       LLVM_DEBUG(dbgs() << "SLP: added a vector of un/bin op.\n");
 
-      TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) && isCommutative(VL0));
-      for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+      // Sort operands of the instructions so that each side is more likely to
+      // have the same opcode.
+      if (isa<BinaryOperator>(VL0) && isCommutative(VL0)) {
+        ValueList Left, Right;
+        reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+        TE->setOperand(0, Left);
+        TE->setOperand(1, Right);
+        buildTree_rec(Left, Depth + 1, {TE, 0});
+        buildTree_rec(Right, Depth + 1, {TE, 1});
+        return;
+      }
+
+      TE->setOperandsInOrder();
+      for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
         buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
       return;
     }
@@ -8687,7 +8710,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         fixupOrderingIndices(CurrentOrder);
       TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
                                    ReuseShuffleIndices, CurrentOrder);
-      TE->setOperand(VL, *this);
+      TE->setOperandsInOrder();
       buildTree_rec(TE->getOperand(0), Depth + 1, {TE, 0});
       if (Consecutive)
         LLVM_DEBUG(dbgs() << "SLP: added a vector of stores.\n");
@@ -8703,13 +8726,46 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
       TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx,
                                    ReuseShuffleIndices);
-      TE->setOperand(VL, *this, isCommutative(VL0));
-      for (unsigned I : seq<unsigned>(CI->arg_size())) {
+      // Sort operands of the instructions so that each side is more likely to
+      // have the same opcode.
+      if (isCommutative(VL0)) {
+        ValueList Left, Right;
+        reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+        TE->setOperand(0, Left);
+        TE->setOperand(1, Right);
+        SmallVector<ValueList> Operands;
+        for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
+          Operands.emplace_back();
+          if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
+            continue;
+          for (Value *V : VL) {
+            auto *CI2 = cast<CallInst>(V);
+            Operands.back().push_back(CI2->getArgOperand(I));
+          }
+          TE->setOperand(I, Operands.back());
+        }
+        buildTree_rec(Left, Depth + 1, {TE, 0});
+        buildTree_rec(Right, Depth + 1, {TE, 1});
+        for (unsigned I : seq<unsigned>(2, CI->arg_size())) {
+          if (Operands[I - 2].empty())
+            continue;
+          buildTree_rec(Operands[I - 2], Depth + 1, {TE, I});
+        }
+        return;
+      }
+      TE->setOperandsInOrder();
+      for (unsigned I : seq<unsigned>(0, CI->arg_size())) {
         // For scalar operands no need to create an entry since no need to
         // vectorize it.
         if (isVectorIntrinsicWithScalarOpAtArg(ID, I))
           continue;
-        buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
+        ValueList Operands;
+        // Prepare the operand vector.
+        for (Value *V : VL) {
+          auto *CI2 = cast<CallInst>(V);
+          Operands.push_back(CI2->getArgOperand(I));
+        }
+        buildTree_rec(Operands, Depth + 1, {TE, I});
       }
       return;
     }
@@ -8720,37 +8776,43 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
 
       // Reorder operands if reordering would enable vectorization.
       auto *CI = dyn_cast<CmpInst>(VL0);
-      if (CI && any_of(VL, [](Value *V) {
-            return !isa<PoisonValue>(V) && !cast<CmpInst>(V)->isCommutative();
-          })) {
-        auto *MainCI = cast<CmpInst>(S.MainOp);
-        auto *AltCI = cast<CmpInst>(S.AltOp);
-        CmpInst::Predicate MainP = MainCI->getPredicate();
-        CmpInst::Predicate AltP = AltCI->getPredicate();
-        assert(MainP != AltP &&
-               "Expected 
diff erent main/alternate predicates.");
+      if (isa<BinaryOperator>(VL0) || CI) {
         ValueList Left, Right;
-        // Collect operands - commute if it uses the swapped predicate or
-        // alternate operation.
-        for (Value *V : VL) {
-          if (isa<PoisonValue>(V)) {
-            Left.push_back(PoisonValue::get(MainCI->getOperand(0)->getType()));
-            Right.push_back(PoisonValue::get(MainCI->getOperand(1)->getType()));
-            continue;
-          }
-          auto *Cmp = cast<CmpInst>(V);
-          Value *LHS = Cmp->getOperand(0);
-          Value *RHS = Cmp->getOperand(1);
+        if (!CI || all_of(VL, [](Value *V) {
+              return isa<PoisonValue>(V) || cast<CmpInst>(V)->isCommutative();
+            })) {
+          reorderInputsAccordingToOpcode(VL, Left, Right, *this);
+        } else {
+          auto *MainCI = cast<CmpInst>(S.MainOp);
+          auto *AltCI = cast<CmpInst>(S.AltOp);
+          CmpInst::Predicate MainP = MainCI->getPredicate();
+          CmpInst::Predicate AltP = AltCI->getPredicate();
+          assert(MainP != AltP &&
+                 "Expected 
diff erent main/alternate predicates.");
+          // Collect operands - commute if it uses the swapped predicate or
+          // alternate operation.
+          for (Value *V : VL) {
+            if (isa<PoisonValue>(V)) {
+              Left.push_back(
+                  PoisonValue::get(MainCI->getOperand(0)->getType()));
+              Right.push_back(
+                  PoisonValue::get(MainCI->getOperand(1)->getType()));
+              continue;
+            }
+            auto *Cmp = cast<CmpInst>(V);
+            Value *LHS = Cmp->getOperand(0);
+            Value *RHS = Cmp->getOperand(1);
 
-          if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
-            if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
-              std::swap(LHS, RHS);
-          } else {
-            if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
-              std::swap(LHS, RHS);
+            if (isAlternateInstruction(Cmp, MainCI, AltCI, *TLI)) {
+              if (AltP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
+                std::swap(LHS, RHS);
+            } else {
+              if (MainP == CmpInst::getSwappedPredicate(Cmp->getPredicate()))
+                std::swap(LHS, RHS);
+            }
+            Left.push_back(LHS);
+            Right.push_back(RHS);
           }
-          Left.push_back(LHS);
-          Right.push_back(RHS);
         }
         TE->setOperand(0, Left);
         TE->setOperand(1, Right);
@@ -8759,8 +8821,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         return;
       }
 
-      TE->setOperand(VL, *this, isa<BinaryOperator>(VL0) || CI);
-      for (unsigned I : seq<unsigned>(VL0->getNumOperands()))
+      TE->setOperandsInOrder();
+      for (unsigned I : seq<unsigned>(0, VL0->getNumOperands()))
         buildTree_rec(TE->getOperand(I), Depth + 1, {TE, I});
       return;
     }
@@ -13465,6 +13527,21 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
   return Cost;
 }
 
+// Perform operand reordering on the instructions in VL and return the reordered
+// operands in Left and Right.
+void BoUpSLP::reorderInputsAccordingToOpcode(ArrayRef<Value *> VL,
+                                             SmallVectorImpl<Value *> &Left,
+                                             SmallVectorImpl<Value *> &Right,
+                                             const BoUpSLP &R) {
+  if (VL.empty())
+    return;
+  VLOperands Ops(VL, R);
+  // Reorder the operands in place.
+  Ops.reorder();
+  Left = Ops.getVL(0);
+  Right = Ops.getVL(1);
+}
+
 Instruction &BoUpSLP::getLastInstructionInBundle(const TreeEntry *E) {
   auto &Res = EntryToLastInstruction.try_emplace(E).first->second;
   if (Res)


        


More information about the llvm-commits mailing list