[llvm] 0e1312f - [SLP][X86]Fix the cost of reused gathers/buildvectors and floats insert.
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 20 09:37:07 PDT 2023
Author: Alexey Bataev
Date: 2023-04-20T09:34:46-07:00
New Revision: 0e1312fbe07c0c14b0317ef62163326b086e0e58
URL: https://github.com/llvm/llvm-project/commit/0e1312fbe07c0c14b0317ef62163326b086e0e58
DIFF: https://github.com/llvm/llvm-project/commit/0e1312fbe07c0c14b0317ef62163326b086e0e58.diff
LOG: [SLP][X86]Fix the cost of reused gathers/buildvectors and floats insert.
There are 2 problems in the cost estimation for buildvector/gather.
1. If the buildvector/gather node is the same as another one node, need
to estimate the cost of this node as 0.
2. The cost of inserting float point register to non-poison vector is
not 0, it should not be considered free.
Differential Revision: https://reviews.llvm.org/D148801
Added:
Modified:
llvm/lib/Target/X86/X86TargetTransformInfo.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
llvm/test/Transforms/SLPVectorizer/X86/buildvector-float-and-extract-lane1.ll
llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 16de19fdc294f..ceeef0f45bde8 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -4377,7 +4377,9 @@ InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
// Floating point scalars are already located in index #0.
// Many insertions to #0 can fold away for scalar fp-ops, so let's assume
// true for all.
- if (ScalarType->isFloatingPointTy())
+ if (ScalarType->isFloatingPointTy() &&
+ (Opcode != Instruction::InsertElement || !Op0 ||
+ isa<UndefValue>(Op0)))
return RegisterFileMoveCost;
if (Opcode == Instruction::InsertElement &&
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index abe25317fca55..83f16f29e9587 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7133,6 +7133,16 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (GatherShuffle) {
assert((Entries.size() == 1 || Entries.size() == 2) &&
"Expected shuffle of 1 or 2 entries.");
+ if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&
+ Entries.front()->isSame(E->Scalars)) {
+ // Perfect match in the graph, will reuse the previously vectorized
+ // node. Cost is 0.
+ LLVM_DEBUG(
+ dbgs()
+ << "SLP: perfect diamond match for gather bundle that starts with "
+ << *VL.front() << ".\n");
+ return 0;
+ }
if (!Resized) {
unsigned VF1 = Entries.front()->getVectorFactor();
unsigned VF2 = Entries.back()->getVectorFactor();
@@ -7145,21 +7155,9 @@ InstructionCost BoUpSLP::getEntryCost(const TreeEntry *E,
if (Mask[I] != UndefMaskElem)
GatheredScalars[I] = PoisonValue::get(ScalarTy);
}
- LLVM_DEBUG(
- int Limit = Mask.size() * 2;
- if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&
- all_of(Mask, [=](int Idx) { return Idx < Limit; }) &&
- ShuffleVectorInst::isIdentityMask(Mask)) {
- // Perfect match in the graph, will reuse the previously
- // vectorized node. Cost is 0.
- dbgs() << "SLP: perfect diamond match for gather bundle "
- "that starts with "
- << *VL.front() << ".\n";
- } else {
- dbgs() << "SLP: shuffled " << Entries.size()
- << " entries for bundle that starts with " << *VL.front()
- << ".\n";
- });
+ LLVM_DEBUG(dbgs() << "SLP: shuffled " << Entries.size()
+ << " entries for bundle that starts with "
+ << *VL.front() << ".\n";);
if (Entries.size() == 1)
Estimator.add(Entries.front(), Mask);
else
@@ -9585,6 +9583,27 @@ Value *BoUpSLP::createBuildVector(const TreeEntry *E) {
}
assert((Entries.size() == 1 || Entries.size() == 2) &&
"Expected shuffle of 1 or 2 entries.");
+ if (*GatherShuffle == TTI::SK_PermuteSingleSrc &&
+ Entries.front()->isSame(E->Scalars)) {
+ // Perfect match in the graph, will reuse the previously vectorized
+ // node. Cost is 0.
+ LLVM_DEBUG(
+ dbgs()
+ << "SLP: perfect diamond match for gather bundle that starts with "
+ << *E->Scalars.front() << ".\n");
+ // Restore the mask for previous partially matched values.
+ for (auto [I, V] : enumerate(E->Scalars)) {
+ if (isa<PoisonValue>(V)) {
+ Mask[I] = UndefMaskElem;
+ continue;
+ }
+ if (Mask[I] == UndefMaskElem)
+ Mask[I] = Entries.front()->findLaneForValue(V);
+ }
+ ShuffleBuilder.add(Entries.front()->VectorizedValue, Mask);
+ Vec = ShuffleBuilder.finalize(E->ReuseShuffleIndices);
+ return Vec;
+ }
if (!Resized) {
unsigned VF1 = Entries.front()->getVectorFactor();
unsigned VF2 = Entries.back()->getVectorFactor();
diff --git a/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll b/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
index 1053c19641667..80613b65e99d3 100644
--- a/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
+++ b/llvm/test/Analysis/CostModel/X86/vector-insert-value.ll
@@ -16,43 +16,43 @@
define i32 @insert_double(i32 %arg, double %val, <2 x double> %src128, <4 x double> %src256, <8 x double> %src512) {
; SSE-LABEL: 'insert_double'
; SSE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_a = insertelement <2 x double> %src128, double %val, i32 %arg
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> %src128, double %val, i32 0
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_0 = insertelement <2 x double> %src128, double %val, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> %src128, double %val, i32 1
; SSE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f64_a = insertelement <4 x double> %src256, double %val, i32 %arg
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> %src256, double %val, i32 0
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_0 = insertelement <4 x double> %src256, double %val, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_3 = insertelement <4 x double> %src256, double %val, i32 3
; SSE-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f64_a = insertelement <8 x double> %src512, double %val, i32 %arg
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> %src512, double %val, i32 0
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_0 = insertelement <8 x double> %src512, double %val, i32 0
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_3 = insertelement <8 x double> %src512, double %val, i32 3
-; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> %src512, double %val, i32 4
+; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_4 = insertelement <8 x double> %src512, double %val, i32 4
; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_7 = insertelement <8 x double> %src512, double %val, i32 7
; SSE-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'insert_double'
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_a = insertelement <2 x double> %src128, double %val, i32 %arg
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> %src128, double %val, i32 0
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_0 = insertelement <2 x double> %src128, double %val, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> %src128, double %val, i32 1
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_a = insertelement <4 x double> %src256, double %val, i32 %arg
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> %src256, double %val, i32 0
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_0 = insertelement <4 x double> %src256, double %val, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> %src256, double %val, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f64_a = insertelement <8 x double> %src512, double %val, i32 %arg
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> %src512, double %val, i32 0
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_0 = insertelement <8 x double> %src512, double %val, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> %src512, double %val, i32 3
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_4 = insertelement <8 x double> %src512, double %val, i32 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_4 = insertelement <8 x double> %src512, double %val, i32 4
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_7 = insertelement <8 x double> %src512, double %val, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'insert_double'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f64_a = insertelement <2 x double> %src128, double %val, i32 %arg
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f64_0 = insertelement <2 x double> %src128, double %val, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_0 = insertelement <2 x double> %src128, double %val, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f64_1 = insertelement <2 x double> %src128, double %val, i32 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_a = insertelement <4 x double> %src256, double %val, i32 %arg
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f64_0 = insertelement <4 x double> %src256, double %val, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f64_0 = insertelement <4 x double> %src256, double %val, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f64_3 = insertelement <4 x double> %src256, double %val, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_a = insertelement <8 x double> %src512, double %val, i32 %arg
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f64_0 = insertelement <8 x double> %src512, double %val, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f64_0 = insertelement <8 x double> %src512, double %val, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_3 = insertelement <8 x double> %src512, double %val, i32 3
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f64_4 = insertelement <8 x double> %src512, double %val, i32 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_4 = insertelement <8 x double> %src512, double %val, i32 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f64_7 = insertelement <8 x double> %src512, double %val, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
@@ -76,153 +76,153 @@ define i32 @insert_double(i32 %arg, double %val, <2 x double> %src128, <4 x doub
define i32 @insert_float(i32 %arg, float %val, <2 x float> %src64, <4 x float> %src128, <8 x float> %src256, <16 x float> %src512) {
; SSE2-LABEL: 'insert_float'
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> %src512, float %val, i32 %arg
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> %src512, float %val, i32 3
-; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
+; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = insertelement <16 x float> %src512, float %val, i32 15
; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE3-LABEL: 'insert_float'
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
; SSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
; SSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
-; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
; SSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> %src512, float %val, i32 %arg
-; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
+; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> %src512, float %val, i32 3
-; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
+; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
; SSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = insertelement <16 x float> %src512, float %val, i32 15
; SSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSSE3-LABEL: 'insert_float'
; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
; SSSE3-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
; SSSE3-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
; SSSE3-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> %src512, float %val, i32 %arg
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_3 = insertelement <16 x float> %src512, float %val, i32 3
-; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
+; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
; SSSE3-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_15 = insertelement <16 x float> %src512, float %val, i32 15
; SSSE3-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SSE4-LABEL: 'insert_float'
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
-; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
; SSE4-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
-; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
; SSE4-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
-; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
-; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
; SSE4-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> %src512, float %val, i32 %arg
-; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
+; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> %src512, float %val, i32 3
-; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
+; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
; SSE4-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> %src512, float %val, i32 15
; SSE4-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX-LABEL: 'insert_float'
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
-; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v16f32_a = insertelement <16 x float> %src512, float %val, i32 %arg
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> %src512, float %val, i32 3
-; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
+; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_15 = insertelement <16 x float> %src512, float %val, i32 15
; AVX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; AVX512-LABEL: 'insert_float'
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_a = insertelement <16 x float> %src512, float %val, i32 %arg
-; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
+; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> %src512, float %val, i32 3
-; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
+; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v16f32_15 = insertelement <16 x float> %src512, float %val, i32 15
; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; SLM-LABEL: 'insert_float'
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
; SLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
; SLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
; SLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> %src512, float %val, i32 %arg
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> %src512, float %val, i32 3
-; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
+; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
; SLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> %src512, float %val, i32 15
; SLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
; GLM-LABEL: 'insert_float'
; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32_a = insertelement <2 x float> %src64, float %val, i32 %arg
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_0 = insertelement <2 x float> %src64, float %val, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32_1 = insertelement <2 x float> %src64, float %val, i32 1
; GLM-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32_a = insertelement <4 x float> %src128, float %val, i32 %arg
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_0 = insertelement <4 x float> %src128, float %val, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32_3 = insertelement <4 x float> %src128, float %val, i32 3
; GLM-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v8f32_a = insertelement <8 x float> %src256, float %val, i32 %arg
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_0 = insertelement <8 x float> %src256, float %val, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_3 = insertelement <8 x float> %src256, float %val, i32 3
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_4 = insertelement <8 x float> %src256, float %val, i32 4
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f32_7 = insertelement <8 x float> %src256, float %val, i32 7
; GLM-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v16f32_a = insertelement <16 x float> %src512, float %val, i32 %arg
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_0 = insertelement <16 x float> %src512, float %val, i32 0
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_3 = insertelement <16 x float> %src512, float %val, i32 3
-; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
+; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_8 = insertelement <16 x float> %src512, float %val, i32 8
; GLM-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16f32_15 = insertelement <16 x float> %src512, float %val, i32 15
; GLM-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef
;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-float-and-extract-lane1.ll b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-float-and-extract-lane1.ll
index 7c6e05992c353..df2160e79d79a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/buildvector-float-and-extract-lane1.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/buildvector-float-and-extract-lane1.ll
@@ -7,28 +7,24 @@
; both into a single vector.
; So this code should not be vectorized.
-; YAML: --- !Passed
+; YAML: --- !Missed
; YAML: Pass: slp-vectorizer
-; YAML: Name: VectorizedList
+; YAML: Name: NotBeneficial
; YAML: Function: test
; YAML: Args:
-; YAML: - String: 'SLP vectorized with cost '
-; YAML: - Cost: '-2'
-; YAML: - String: ' and with tree size '
-; YAML: - TreeSize: '3'
+; YAML: - String: 'List vectorization was possible but not beneficial with cost '
+; YAML: - Cost: '0'
+; YAML: - String: ' >= '
+; YAML: - Treshold: '0'
; YAML: ...
define void @test(<4 x float> %vec, float %a, float %b, ptr %ptr) {
; CHECK-LABEL: define void @test
; CHECK-SAME: (<4 x float> [[VEC:%.*]], float [[A:%.*]], float [[B:%.*]], ptr [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[FADD:%.*]] = fadd float [[A]], [[B]]
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[VEC]], <4 x float> poison, <2 x i32> <i32 undef, i32 1>
-; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FADD]], i32 0
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[VEC]], <4 x float> [[TMP3]], <2 x i32> <i32 4, i32 1>
-; CHECK-NEXT: [[TMP5:%.*]] = fsub <2 x float> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP5]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP5]], i32 1
-; CHECK-NEXT: [[ROOT:%.*]] = fadd float [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[EXTR1:%.*]] = extractelement <4 x float> [[VEC]], i64 1
+; CHECK-NEXT: [[FSUB0:%.*]] = fsub float [[FADD]], [[FADD]]
+; CHECK-NEXT: [[FSUB1:%.*]] = fsub float [[EXTR1]], [[EXTR1]]
+; CHECK-NEXT: [[ROOT:%.*]] = fadd float [[FSUB0]], [[FSUB1]]
; CHECK-NEXT: store float [[ROOT]], ptr [[PTR]], align 4
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
index 1e95cbd350610..df5fcdb7beb65 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-fneg-insert.ll
@@ -8,9 +8,8 @@ declare void @use(float)
define <4 x float> @ext0_v4f32(<4 x float> %x, <4 x float> %y) {
; CHECK-LABEL: @ext0_v4f32(
-; CHECK-NEXT: [[E:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0
-; CHECK-NEXT: [[N:%.*]] = fneg float [[E]]
-; CHECK-NEXT: [[R:%.*]] = insertelement <4 x float> [[Y:%.*]], float [[N]], i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = fneg <4 x float> [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[Y:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
; CHECK-NEXT: ret <4 x float> [[R]]
;
%e = extractelement <4 x float> %x, i32 0
More information about the llvm-commits
mailing list