[llvm] [X86] combineConcatVectorOps - concat per-lane v2f64/v4f64 shuffles into vXf64 vshufpd (PR #143017)
Alexander Kornienko via llvm-commits
llvm-commits at lists.llvm.org
Fri Jun 20 09:17:49 PDT 2025
alexfh wrote:
The reduced test case is here: https://gcc.godbolt.org/z/e8f6bxPfT. Looks like an infinite loop, given that the size of the input is quite small.
And here is the profile of a few seconds of the Clang execution:
```
- 98.39% 0.00% clang-checked clang-checked [.] llvm::MachineFunctionPass::runOnFunction(llvm::Function&) ◆
llvm::MachineFunctionPass::runOnFunction(llvm::Function&) ▒
llvm::SelectionDAGISelLegacy::runOnMachineFunction(llvm::MachineFunction&) ▒
llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) ▒
- llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) ▒
- 98.39% llvm::SelectionDAGISel::CodeGenAndEmitDAG() ▒
- 98.13% llvm::SelectionDAG::Combine(llvm::CombineLevel, llvm::BatchAAResults*, llvm::CodeGenOptLevel) ▒
- 71.13% (anonymous namespace)::DAGCombiner::combine(llvm::SDNode*) ▒
- 45.17% llvm::X86TargetLowering::PerformDAGCombine(llvm::SDNode*, llvm::TargetLowering::DAGCombinerInfo&) const ▒
- 42.34% combineINSERT_SUBVECTOR(llvm::SDNode*, llvm::SelectionDAG&, llvm::TargetLowering::DAGCombinerInfo&, llvm::X86Subtarget const&) ▒
- 23.12% combineConcatVectorOps(llvm::SDLoc const&, llvm::MVT, llvm::ArrayRef<llvm::SDValue>, llvm::SelectionDAG&, llvm::X86Subtarget const&, unsigned int) ▒
- 21.60% EltsFromConsecutiveLoads(llvm::EVT, llvm::ArrayRef<llvm::SDValue>, llvm::SDLoc const&, llvm::SelectionDAG&, llvm::X86Subtarget const&, bool) ▒
- 16.38% EltsFromConsecutiveLoads(llvm::EVT, llvm::ArrayRef<llvm::SDValue>, llvm::SDLoc const&, llvm::SelectionDAG&, llvm::X86Subtarget const&, bool)::$_1::operator()(llvm::EVT, llvm::LoadSDNode*) const ▒
- 8.97% llvm::SelectionDAG::makeEquivalentMemoryOrdering(llvm::SDValue, llvm::SDValue) ▒
+ 3.24% llvm::SelectionDAG::getNode(unsigned int, llvm::SDLoc const&, llvm::EVT, llvm::SDValue, llvm::SDValue, llvm::SDNodeFlags) ▒
+ 3.20% llvm::SelectionDAG::ReplaceAllUsesOfValueWith(llvm::SDValue, llvm::SDValue) ▒
+ 1.90% llvm::SelectionDAG::UpdateNodeOperands(llvm::SDNode*, llvm::SDValue, llvm::SDValue) ▒
+ 6.70% llvm::SelectionDAG::getLoad(llvm::EVT, llvm::SDLoc const&, llvm::SDValue, llvm::SDValue, llvm::MachinePointerInfo, llvm::MaybeAlign, llvm::MachineMemOperand::Flags, llvm::AAMDNodes const&, llvm::MDNode cons▒
+ 2.69% llvm::MachinePointerInfo::isDereferenceable(unsigned int, llvm::LLVMContext&, llvm::DataLayout const&) const ▒
- 12.24% concatSubVectors(llvm::SDValue, llvm::SDValue, llvm::SelectionDAG&, llvm::SDLoc const&) ▒
- 10.83% insertSubVector(llvm::SDValue, llvm::SDValue, unsigned int, llvm::SelectionDAG&, llvm::SDLoc const&, unsigned int) ▒
+ 5.45% llvm::SelectionDAG::getNode(unsigned int, llvm::SDLoc const&, llvm::EVT, llvm::SDValue, llvm::SDValue, llvm::SDValue, llvm::SDNodeFlags) ▒
+ 4.06% llvm::SelectionDAG::getVectorIdxConstant(unsigned long, llvm::SDLoc const&, bool) ▒
0.88% llvm::SelectionDAG::getNode(unsigned int, llvm::SDLoc const&, llvm::EVT) ▒
+ 2.42% collectConcatOps(llvm::SDNode*, llvm::SmallVectorImpl<llvm::SDValue>&, llvm::SelectionDAG&) ▒
+ 1.25% llvm::SelectionDAG::areNonVolatileConsecutiveLoads(llvm::LoadSDNode*, llvm::LoadSDNode*, unsigned int, int) const ▒
1.21% llvm::MVT::getVectorVT(llvm::MVT, unsigned int) ▒
1.71% combineLoad(llvm::SDNode*, llvm::SelectionDAG&, llvm::TargetLowering::DAGCombinerInfo&, llvm::X86Subtarget const&) ▒
- 15.12% (anonymous namespace)::DAGCombiner::visitINSERT_SUBVECTOR(llvm::SDNode*) ▒
- 14.54% (anonymous namespace)::DAGCombiner::SimplifyDemandedVectorElts(llvm::SDValue) ▒
- 13.99% (anonymous namespace)::DAGCombiner::SimplifyDemandedVectorElts(llvm::SDValue, llvm::APInt const&, bool) ▒
- 13.30% llvm::TargetLowering::SimplifyDemandedVectorElts(llvm::SDValue, llvm::APInt const&, llvm::APInt&, llvm::APInt&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const ▒
- 8.47% llvm::TargetLowering::SimplifyDemandedVectorElts(llvm::SDValue, llvm::APInt const&, llvm::APInt&, llvm::APInt&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const ▒
- 4.76% llvm::TargetLowering::SimplifyDemandedBits(llvm::SDValue, llvm::APInt const&, llvm::APInt const&, llvm::KnownBits&, llvm::TargetLowering::TargetLoweringOpt&, unsigned int, bool) const ▒
1.09% llvm::SelectionDAG::computeKnownBits(llvm::SDValue, llvm::APInt const&, unsigned int) const ▒
+ 1.63% llvm::TargetLowering::SimplifyMultipleUseDemandedVectorElts(llvm::SDValue, llvm::APInt const&, llvm::SelectionDAG&, unsigned int) const ▒
- 7.30% (anonymous namespace)::DAGCombiner::visitLOAD(llvm::SDNode*) ▒
+ 1.69% llvm::SelectionDAG::ReplaceAllUsesOfValueWith(llvm::SDValue, llvm::SDValue) ▒
1.09% (anonymous namespace)::DAGCombiner::deleteAndRecombine(llvm::SDNode*) ▒
+ 0.95% llvm::SelectionDAG::InferPtrAlign(llvm::SDValue) const ▒
0.73% (anonymous namespace)::DAGCombiner::AddToWorklist(llvm::SDNode*, bool, bool) ▒
+ 0.66% (anonymous namespace)::DAGCombiner::FindBetterChain(llvm::SDNode*, llvm::SDValue) ▒
1.61% (anonymous namespace)::DAGCombiner::visitTokenFactor(llvm::SDNode*) ▒
- 8.52% llvm::SelectionDAG::LegalizeOp(llvm::SDNode*, llvm::SmallSetVector<llvm::SDNode*, 16u>&) ▒
- 7.80% (anonymous namespace)::SelectionDAGLegalize::LegalizeOp(llvm::SDNode*) ▒
+ 3.00% llvm::TargetLoweringBase::allowsMemoryAccessForAlignment(llvm::LLVMContext&, llvm::DataLayout const&, llvm::EVT, llvm::MachineMemOperand const&, unsigned int*) const ▒
1.20% llvm::TargetLoweringBase::getTypeConversion(llvm::LLVMContext&, llvm::EVT) const ▒
- 5.76% (anonymous namespace)::DAGCombiner::recursivelyDeleteUnusedNodes(llvm::SDNode*) ▒
0.97% llvm::SelectionDAG::DeallocateNode(llvm::SDNode*) ▒
0.96% llvm::SetVector<llvm::SDNode*, llvm::SmallVector<llvm::SDNode*, 16u>, llvm::DenseSet<llvm::SDNode*, llvm::DenseMapInfo<llvm::SDNode*, void> >, 16u>::insert(llvm::SDNode* const&) ▒
0.89% llvm::SelectionDAG::DeleteNode(llvm::SDNode*) ▒
0.61% llvm::SetVector<llvm::SDNode*, llvm::SmallVector<llvm::SDNode*, 32u>, llvm::DenseSet<llvm::SDNode*, llvm::DenseMapInfo<llvm::SDNode*, void> >, 32u>::remove(llvm::SDNode* const&) ▒
- 4.15% llvm::SelectionDAG::ReplaceAllUsesWith(llvm::SDValue, llvm::SDValue) ▒
- 3.07% llvm::SelectionDAG::AddModifiedNodeToCSEMaps(llvm::SDNode*) ▒
+ 2.59% llvm::FoldingSetBase::GetOrInsertNode(llvm::FoldingSetBase::Node*, llvm::FoldingSetBase::FoldingSetInfo const&) ▒
+ 1.84% llvm::SelectionDAG::ReplaceAllUsesWith(llvm::SDNode*, llvm::SDNode*) ▒
+ 1.07% (anonymous namespace)::DAGCombiner::AddToWorklist(llvm::SDNode*, bool, bool) ▒
0.84% llvm::SetVector<llvm::SDNode*, llvm::SmallVector<llvm::SDNode*, 32u>, llvm::DenseSet<llvm::SDNode*, llvm::DenseMapInfo<llvm::SDNode*, void> >, 32u>::insert(llvm::SDNode* const&) ▒
```
https://github.com/llvm/llvm-project/pull/143017
More information about the llvm-commits
mailing list