[llvm] [SLP]Reduce number of alternate instruction, where possible (PR #123360)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 29 05:08:24 PST 2025


================
@@ -8247,6 +8393,142 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
     return;
   }
 
+  // Tries to build split node.
+  auto TrySplitNode = [&, &TTI = *TTI](unsigned SmallNodeSize,
+                                       const InstructionsState &LocalState) {
+    if (VL.size() <= SmallNodeSize)
+      return false;
+
+    // Any value is used in split node already - just gather.
+    if (any_of(VL, [&](Value *V) {
+          return ScalarsInSplitNodes.contains(V) ||
+                 ScalarToTreeEntry.contains(V);
+        })) {
+      if (TryToFindDuplicates(S))
+        newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+                     ReuseShuffleIndices);
+      return true;
+    }
+    SmallVector<Value *> Op1, Op2;
+    OrdersType ReorderIndices(VL.size(), VL.size());
+    SmallBitVector Op1Indices(VL.size());
+    for (auto [Idx, V] : enumerate(VL)) {
+      auto *I = dyn_cast<Instruction>(V);
+      if (!I) {
+        Op1.push_back(V);
+        Op1Indices.set(Idx);
+        continue;
+      }
+      InstructionsState NewS = getSameOpcode({LocalState.getMainOp(), I}, *TLI);
+      if (NewS && !NewS.isAltShuffle()) {
+        Op1.push_back(V);
+        Op1Indices.set(Idx);
+        continue;
+      }
+      Op2.push_back(V);
+    }
+    Type *ScalarTy = getValueType(VL.front());
+    VectorType *VecTy = getWidenedType(ScalarTy, VL.size());
+    unsigned Opcode0 = LocalState.getOpcode();
+    unsigned Opcode1 = LocalState.getAltOpcode();
+    SmallBitVector OpcodeMask(getAltInstrMask(VL, Opcode0, Opcode1));
+    // Enable split node, only if all nodes are power-of-2/full registers and
+    // do not form legal alternate instruction (like X86 addsub).
+    SmallPtrSet<Value *, 4> UOp1(Op1.begin(), Op1.end());
+    SmallPtrSet<Value *, 4> UOp2(Op2.begin(), Op2.end());
+    if (UOp1.size() <= 1 || UOp2.size() <= 1 ||
+        TTI.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask) ||
+        !hasFullVectorsOrPowerOf2(TTI, Op1.front()->getType(), UOp1.size()) ||
+        !hasFullVectorsOrPowerOf2(TTI, Op2.front()->getType(), UOp2.size()))
+      return false;
+    unsigned Op1Cnt = 0, Op2Cnt = Op1.size();
+    for (unsigned Idx : seq<unsigned>(VL.size())) {
+      if (Op1Indices.test(Idx)) {
+        ReorderIndices[Op1Cnt] = Idx;
+        ++Op1Cnt;
+      } else {
+        ReorderIndices[Op2Cnt] = Idx;
+        ++Op2Cnt;
+      }
+    }
+    if (isIdentityOrder(ReorderIndices))
+      ReorderIndices.clear();
+    SmallVector<int> Mask;
+    if (!ReorderIndices.empty())
+      inversePermutation(ReorderIndices, Mask);
+    unsigned NumParts = TTI.getNumberOfParts(VecTy);
+    VectorType *Op2VecTy = getWidenedType(ScalarTy, Op2.size());
+    // Check non-profitable single register ops, which better to be represented
+    // as alternate ops.
+    if (NumParts >= VL.size())
+      return false;
+    if (NumParts <= 1 && LocalState.getMainOp()->isBinaryOp() &&
+        LocalState.getAltOp()->isBinaryOp()) {
+      bool AreShifts =
+          LocalState.getMainOp()->isShift() && LocalState.getAltOp()->isShift();
+      bool AreBitwiseLogics = LocalState.getMainOp()->isBitwiseLogicOp() &&
+                              LocalState.getAltOp()->isBitwiseLogicOp();
----------------
RKSimon wrote:

Add InstructionsState::isShift() and InstructionsState::isBitwiseLogicOp() helpers to tidy this up?

https://github.com/llvm/llvm-project/pull/123360


More information about the llvm-commits mailing list