[llvm] [SLP]Reduce number of alternate instruction, where possible (PR #123360)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 05:08:24 PST 2025
================
@@ -8247,6 +8393,142 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
return;
}
+ // Tries to build split node.
+ auto TrySplitNode = [&, &TTI = *TTI](unsigned SmallNodeSize,
+ const InstructionsState &LocalState) {
+ if (VL.size() <= SmallNodeSize)
+ return false;
+
+ // Any value is used in split node already - just gather.
+ if (any_of(VL, [&](Value *V) {
+ return ScalarsInSplitNodes.contains(V) ||
+ ScalarToTreeEntry.contains(V);
+ })) {
+ if (TryToFindDuplicates(S))
+ newTreeEntry(VL, std::nullopt /*not vectorized*/, S, UserTreeIdx,
+ ReuseShuffleIndices);
+ return true;
+ }
+ SmallVector<Value *> Op1, Op2;
+ OrdersType ReorderIndices(VL.size(), VL.size());
+ SmallBitVector Op1Indices(VL.size());
+ for (auto [Idx, V] : enumerate(VL)) {
+ auto *I = dyn_cast<Instruction>(V);
+ if (!I) {
+ Op1.push_back(V);
+ Op1Indices.set(Idx);
+ continue;
+ }
+ InstructionsState NewS = getSameOpcode({LocalState.getMainOp(), I}, *TLI);
+ if (NewS && !NewS.isAltShuffle()) {
+ Op1.push_back(V);
+ Op1Indices.set(Idx);
+ continue;
+ }
+ Op2.push_back(V);
+ }
+ Type *ScalarTy = getValueType(VL.front());
+ VectorType *VecTy = getWidenedType(ScalarTy, VL.size());
+ unsigned Opcode0 = LocalState.getOpcode();
+ unsigned Opcode1 = LocalState.getAltOpcode();
+ SmallBitVector OpcodeMask(getAltInstrMask(VL, Opcode0, Opcode1));
+ // Enable split node, only if all nodes are power-of-2/full registers and
+ // do not form legal alternate instruction (like X86 addsub).
+ SmallPtrSet<Value *, 4> UOp1(Op1.begin(), Op1.end());
+ SmallPtrSet<Value *, 4> UOp2(Op2.begin(), Op2.end());
+ if (UOp1.size() <= 1 || UOp2.size() <= 1 ||
+ TTI.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask) ||
+ !hasFullVectorsOrPowerOf2(TTI, Op1.front()->getType(), UOp1.size()) ||
+ !hasFullVectorsOrPowerOf2(TTI, Op2.front()->getType(), UOp2.size()))
+ return false;
+ unsigned Op1Cnt = 0, Op2Cnt = Op1.size();
+ for (unsigned Idx : seq<unsigned>(VL.size())) {
+ if (Op1Indices.test(Idx)) {
+ ReorderIndices[Op1Cnt] = Idx;
+ ++Op1Cnt;
+ } else {
+ ReorderIndices[Op2Cnt] = Idx;
+ ++Op2Cnt;
+ }
+ }
+ if (isIdentityOrder(ReorderIndices))
+ ReorderIndices.clear();
+ SmallVector<int> Mask;
+ if (!ReorderIndices.empty())
+ inversePermutation(ReorderIndices, Mask);
+ unsigned NumParts = TTI.getNumberOfParts(VecTy);
+ VectorType *Op2VecTy = getWidenedType(ScalarTy, Op2.size());
+ // Check non-profitable single register ops, which better to be represented
+ // as alternate ops.
+ if (NumParts >= VL.size())
+ return false;
+ if (NumParts <= 1 && LocalState.getMainOp()->isBinaryOp() &&
+ LocalState.getAltOp()->isBinaryOp()) {
+ bool AreShifts =
+ LocalState.getMainOp()->isShift() && LocalState.getAltOp()->isShift();
+ bool AreBitwiseLogics = LocalState.getMainOp()->isBitwiseLogicOp() &&
+ LocalState.getAltOp()->isBitwiseLogicOp();
----------------
RKSimon wrote:
Add InstructionsState::isShift() and InstructionsState::isBitwiseLogicOp() helpers to tidy this up?
https://github.com/llvm/llvm-project/pull/123360
More information about the llvm-commits
mailing list