[llvm] [SLP]Fix graph traversal in getSpillCost (PR #124984)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 13:05:32 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Alexey Bataev (alexey-bataev)
<details>
<summary>Changes</summary>
getSpill cost relies on def-use order when performs the analysis for the
vectorized instructions live-over-calls spills.
Patch fixes it to check the dependencies based on TreeEntries and
performs actual vectorized type analysis.
---
Patch is 101.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124984.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+45-42)
- (modified) llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll (+352-572)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 4204f35d1a20d6..cef756627c5e2c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -1395,7 +1395,7 @@ class BoUpSLP {
/// \returns the cost incurred by unwanted spills and fills, caused by
/// holding live values over call sites.
- InstructionCost getSpillCost() const;
+ InstructionCost getSpillCost();
/// \returns the vectorization cost of the subtree that starts at \p VL.
/// A negative number means that this is profitable.
@@ -12160,16 +12160,15 @@ bool BoUpSLP::isTreeNotExtendable() const {
return Res;
}
-InstructionCost BoUpSLP::getSpillCost() const {
+InstructionCost BoUpSLP::getSpillCost() {
// Walk from the bottom of the tree to the top, tracking which values are
// live. When we see a call instruction that is not part of our tree,
// query TTI to see if there is a cost to keeping values live over it
// (for example, if spills and fills are required).
- unsigned BundleWidth = VectorizableTree.front()->Scalars.size();
InstructionCost Cost = 0;
- SmallPtrSet<Instruction *, 4> LiveValues;
- Instruction *PrevInst = nullptr;
+ SmallPtrSet<const TreeEntry *, 4> LiveEntries;
+ const TreeEntry *Prev = nullptr;
// The entries in VectorizableTree are not necessarily ordered by their
// position in basic blocks. Collect them and order them by dominance so later
@@ -12177,61 +12176,64 @@ InstructionCost BoUpSLP::getSpillCost() const {
// different basic blocks, we only scan to the beginning of the block, so
// their order does not matter, as long as all instructions in a basic block
// are grouped together. Using dominance ensures a deterministic order.
- SmallVector<Instruction *, 16> OrderedScalars;
+ SmallVector<TreeEntry *, 16> OrderedEntries;
for (const auto &TEPtr : VectorizableTree) {
- if (TEPtr->State != TreeEntry::Vectorize)
+ if (TEPtr->isGather())
continue;
- Instruction *Inst = dyn_cast<Instruction>(TEPtr->Scalars[0]);
- if (!Inst)
- continue;
- OrderedScalars.push_back(Inst);
- }
- llvm::sort(OrderedScalars, [&](Instruction *A, Instruction *B) {
- auto *NodeA = DT->getNode(A->getParent());
- auto *NodeB = DT->getNode(B->getParent());
+ OrderedEntries.push_back(TEPtr.get());
+ }
+ llvm::stable_sort(OrderedEntries, [&](const TreeEntry *TA,
+ const TreeEntry *TB) {
+ Instruction &A = getLastInstructionInBundle(TA);
+ Instruction &B = getLastInstructionInBundle(TB);
+ auto *NodeA = DT->getNode(A.getParent());
+ auto *NodeB = DT->getNode(B.getParent());
assert(NodeA && "Should only process reachable instructions");
assert(NodeB && "Should only process reachable instructions");
assert((NodeA == NodeB) == (NodeA->getDFSNumIn() == NodeB->getDFSNumIn()) &&
"Different nodes should have different DFS numbers");
if (NodeA != NodeB)
return NodeA->getDFSNumIn() > NodeB->getDFSNumIn();
- return B->comesBefore(A);
+ return B.comesBefore(&A);
});
- for (Instruction *Inst : OrderedScalars) {
- if (!PrevInst) {
- PrevInst = Inst;
+ for (const TreeEntry *TE : OrderedEntries) {
+ if (!Prev) {
+ Prev = TE;
continue;
}
- // Update LiveValues.
- LiveValues.erase(PrevInst);
- for (auto &J : PrevInst->operands()) {
- if (isa<Instruction>(&*J) && isVectorized(&*J))
- LiveValues.insert(cast<Instruction>(&*J));
+ LiveEntries.erase(Prev);
+ for (unsigned I : seq<unsigned>(Prev->getNumOperands())) {
+ const TreeEntry *Op = getVectorizedOperand(Prev, I);
+ if (!Op)
+ continue;
+ assert(!Op->isGather() && "Expected vectorized operand.");
+ LiveEntries.insert(Op);
}
LLVM_DEBUG({
- dbgs() << "SLP: #LV: " << LiveValues.size();
- for (auto *X : LiveValues)
- dbgs() << " " << X->getName();
+ dbgs() << "SLP: #LV: " << LiveEntries.size();
+ for (auto *X : LiveEntries)
+ X->dump();
dbgs() << ", Looking at ";
- Inst->dump();
+ TE->dump();
});
// Now find the sequence of instructions between PrevInst and Inst.
unsigned NumCalls = 0;
- BasicBlock::reverse_iterator InstIt = ++Inst->getIterator().getReverse(),
- PrevInstIt =
- PrevInst->getIterator().getReverse();
+ const Instruction *PrevInst = &getLastInstructionInBundle(Prev);
+ BasicBlock::const_reverse_iterator
+ InstIt = ++getLastInstructionInBundle(TE).getIterator().getReverse(),
+ PrevInstIt = PrevInst->getIterator().getReverse();
while (InstIt != PrevInstIt) {
if (PrevInstIt == PrevInst->getParent()->rend()) {
- PrevInstIt = Inst->getParent()->rbegin();
+ PrevInstIt = getLastInstructionInBundle(TE).getParent()->rbegin();
continue;
}
- auto NoCallIntrinsic = [this](Instruction *I) {
- if (auto *II = dyn_cast<IntrinsicInst>(I)) {
+ auto NoCallIntrinsic = [this](const Instruction *I) {
+ if (const auto *II = dyn_cast<IntrinsicInst>(I)) {
if (II->isAssumeLikeIntrinsic())
return true;
IntrinsicCostAttributes ICA(II->getIntrinsicID(), *II);
@@ -12255,17 +12257,18 @@ InstructionCost BoUpSLP::getSpillCost() const {
}
if (NumCalls) {
- SmallVector<Type *, 4> V;
- for (auto *II : LiveValues) {
- auto *ScalarTy = II->getType();
- if (auto *VectorTy = dyn_cast<FixedVectorType>(ScalarTy))
- ScalarTy = VectorTy->getElementType();
- V.push_back(getWidenedType(ScalarTy, BundleWidth));
+ SmallVector<Type *, 4> EntriesTypes;
+ for (const TreeEntry *TE : LiveEntries) {
+ auto *ScalarTy = TE->getMainOp()->getType();
+ auto It = MinBWs.find(TE);
+ if (It != MinBWs.end())
+ ScalarTy = IntegerType::get(ScalarTy->getContext(), It->second.first);
+ EntriesTypes.push_back(getWidenedType(ScalarTy, TE->getVectorFactor()));
}
- Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(V);
+ Cost += NumCalls * TTI->getCostOfKeepingLiveOverCall(EntriesTypes);
}
- PrevInst = Inst;
+ Prev = TE;
}
return Cost;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
index 6fbd05aaedfe5b..5bfd776512711f 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
@@ -149,37 +149,27 @@ define <4 x float> @exp_4x(ptr %a) {
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
-; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
-; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
-; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
-; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x float> [[VECINS_31]]
;
; DEFAULT-LABEL: define <4 x float> @exp_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
-; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
-; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
-; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
-; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+; DEFAULT-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; DEFAULT-NEXT: [[TMP2:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP1]])
+; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; DEFAULT-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; DEFAULT-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]])
+; DEFAULT-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
@@ -206,37 +196,27 @@ define <4 x float> @int_exp_4x(ptr %a) {
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
-; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
-; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
-; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
-; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x float> [[VECINS_31]]
;
; DEFAULT-LABEL: define <4 x float> @int_exp_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT]])
-; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_1]])
-; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_2]])
-; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @llvm.exp.f32(float [[VECEXT_3]])
-; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+; DEFAULT-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; DEFAULT-NEXT: [[TMP2:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP1]])
+; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; DEFAULT-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; DEFAULT-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.exp.v2f32(<2 x float> [[TMP4]])
+; DEFAULT-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
@@ -263,37 +243,27 @@ define <4 x float> @log_4x(ptr %a) {
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
-; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
-; CHECK-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
-; CHECK-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
-; CHECK-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; CHECK-NEXT: ret <4 x float> [[VECINS_3]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; CHECK-NEXT: [[TMP2:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; CHECK-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]])
+; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: ret <4 x float> [[VECINS_31]]
;
; DEFAULT-LABEL: define <4 x float> @log_4x
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; DEFAULT-NEXT: entry:
; DEFAULT-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; DEFAULT-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; DEFAULT-NEXT: [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
-; DEFAULT-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; DEFAULT-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; DEFAULT-NEXT: [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
-; DEFAULT-NEXT: [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; DEFAULT-NEXT: [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; DEFAULT-NEXT: [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
-; DEFAULT-NEXT: [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; DEFAULT-NEXT: [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; DEFAULT-NEXT: [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
-; DEFAULT-NEXT: [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
-; DEFAULT-NEXT: ret <4 x float> [[VECINS_3]]
+; DEFAULT-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 0, i32 1>
+; DEFAULT-NEXT: [[TMP2:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP1]])
+; DEFAULT-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; DEFAULT-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
+; DEFAULT-NEXT: [[TMP5:%.*]] = call fast <2 x float> @llvm.log.v2f32(<2 x float> [[TMP4]])
+; DEFAULT-NEXT: [[TMP6:%.*]] = shufflevector <2 x float> [[TMP5]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; DEFAULT-NEXT: [[VECINS_31:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; DEFAULT-NEXT: ret <4 x float> [[VECINS_31]]
;
entry:
%0 = load <4 x float>, ptr %a, align 16
@@ -320,37 +290,27 @@ define <4 x float> @int_log_4x(ptr %a) {
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 16
-; CHECK-NEXT: [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = tail call fast float @llvm.log.f32(float [[VECEXT]])
-; CHECK-NEXT: [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT: [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT: [[TMP2:%.*]] = tail call fast float @llvm.log.f32(float [[VECEX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/124984
More information about the llvm-commits
mailing list