[llvm] ea83e1c - [LV] Assign cost to all interleave members when not interleaving.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 11 13:07:02 PDT 2024
Author: Florian Hahn
Date: 2024-09-11T21:04:34+01:00
New Revision: ea83e1c05a5adee5d8f9e680356ee57556ba64a1
URL: https://github.com/llvm/llvm-project/commit/ea83e1c05a5adee5d8f9e680356ee57556ba64a1
DIFF: https://github.com/llvm/llvm-project/commit/ea83e1c05a5adee5d8f9e680356ee57556ba64a1.diff
LOG: [LV] Assign cost to all interleave members when not interleaving.
At the moment, the full cost of all interleave group members is assigned
to the instruction at the group's insert position, even if the decision
was to not form an interleave group.
This can lead to inaccurate cost estimates, e.g. if the instruction at
the insert position is dead. If the decision is to not vectorize but
scalarize or scather/gather, then the cost will be to total cost for all
members. In those cases, assign individual the cost per member, to more
closely reflect to choice per instruction.
This fixes a divergence between legacy and VPlan-based cost model.
Fixes https://github.com/llvm/llvm-project/issues/108098.
Added:
Modified:
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll
llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll
llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll
llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll
llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 640a7bf3d672f0..3b6b154b9660cf 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -1182,13 +1182,23 @@ class LoopVectorizationCostModel {
InstructionCost Cost) {
assert(VF.isVector() && "Expected VF >=2");
/// Broadcast this decicion to all instructions inside the group.
- /// But the cost will be assigned to one instruction only.
+ /// When interleaving, the cost will only be assigned one instruction, the
+ /// insert position. For other cases, add the appropriate fraction of the
+ /// total cost to each instruction. This ensures accurate costs are used,
+ /// even if the insert position instruction is not used.
+ InstructionCost InsertPosCost = Cost;
+ InstructionCost OtherMemberCost = 0;
+ if (W != CM_Interleave)
+ OtherMemberCost = InsertPosCost = Cost / Grp->getNumMembers();
+ ;
for (unsigned Idx = 0; Idx < Grp->getFactor(); ++Idx) {
if (auto *I = Grp->getMember(Idx)) {
if (Grp->getInsertPos() == I)
- WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, Cost);
+ WideningDecisions[std::make_pair(I, VF)] =
+ std::make_pair(W, InsertPosCost);
else
- WideningDecisions[std::make_pair(I, VF)] = std::make_pair(W, 0);
+ WideningDecisions[std::make_pair(I, VF)] =
+ std::make_pair(W, OtherMemberCost);
}
}
}
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll
index 29dce5f21173af..57ae02abc11996 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-5.ll
@@ -82,26 +82,26 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load float, ptr %in2, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load float, ptr %in3, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 75 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 150 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
@@ -139,11 +139,11 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512: LV: Found an estimated cost of 400 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v4 = load float, ptr %in4, align 4
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll
index 0e7b1c58e587c1..2cc5150f3c887f 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-7.ll
@@ -108,34 +108,34 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load float, ptr %in4, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 105 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 210 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 15 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
@@ -180,13 +180,13 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll
index 8830aff579c322..1899741b8a3b40 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f32-stride-8.ll
@@ -121,22 +121,22 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load float, ptr %in5, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load float, ptr %in6, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load float, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load float, ptr %in7, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load float, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load float, ptr %in7, align 4
; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load float, ptr %in0, align 4
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load float, ptr %in1, align 4
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load float, ptr %in2, align 4
@@ -145,14 +145,14 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load float, ptr %in5, align 4
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load float, ptr %in6, align 4
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX2: LV: Found an estimated cost of 240 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 30 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load float, ptr %in0, align 4
@@ -203,14 +203,14 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load float, ptr %in5, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load float, ptr %in6, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load float, ptr %in7, align 4
-; AVX512: LV: Found an estimated cost of 320 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load float, ptr %in7, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load float, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load float, ptr %in1, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load float, ptr %in2, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load float, ptr %in3, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load float, ptr %in4, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load float, ptr %in5, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load float, ptr %in6, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v7 = load float, ptr %in7, align 4
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll
index cfd3d7841caa2d..2d4b300a8100a4 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-2.ll
@@ -71,10 +71,10 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 160 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll
index 5ec5b517313856..5dfb25e25d6e61 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-3.ll
@@ -88,12 +88,12 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 120 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 240 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v2 = load double, ptr %in2, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll
index 450743df723251..bd88ca810728b1 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-4.ll
@@ -107,14 +107,14 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll
index 5e5c718dba97d2..9c0798631fdba5 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-5.ll
@@ -72,21 +72,21 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load double, ptr %in2, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load double, ptr %in3, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 15 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 35 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 70 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
@@ -114,16 +114,16 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 100 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 200 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll
index 62541fa2368c6a..99a735d3f552c4 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-6.ll
@@ -133,18 +133,18 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll
index cfed8554b978b8..168e9166ea1dd4 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-7.ll
@@ -94,27 +94,27 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load double, ptr %in4, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 21 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 49 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 98 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
@@ -152,20 +152,20 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512: LV: Found an estimated cost of 140 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load double, ptr %in6, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll
index 07939b914d0224..919a17e8729e0f 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-f64-stride-8.ll
@@ -105,30 +105,30 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load double, ptr %in5, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load double, ptr %in6, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX2: LV: Found an estimated cost of 24 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 3 for VF 2 For instruction: %v7 = load double, ptr %in7, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 7 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 14 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load double, ptr %in0, align 8
@@ -163,22 +163,22 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load double, ptr %in5, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load double, ptr %in6, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8
-; AVX512: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load double, ptr %in7, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load double, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load double, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load double, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load double, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load double, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load double, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load double, ptr %in6, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load double, ptr %in7, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
index 964a9b660942e1..6737c722b46ff9 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-5.ll
@@ -93,31 +93,31 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i16, ptr %in2, align 2
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i16, ptr %in3, align 2
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 165 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 330 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
;
; AVX512DQ-LABEL: 'test'
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll
index 6653198397dd25..46d56a75f1c4de 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-7.ll
@@ -123,41 +123,41 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i16, ptr %in4, align 2
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 231 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 462 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
;
; AVX512DQ-LABEL: 'test'
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll
index b3a5cbeccc09c4..4d65abdaf688c9 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i16-stride-8.ll
@@ -138,46 +138,46 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i16, ptr %in5, align 2
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i16, ptr %in6, align 2
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2: LV: Found an estimated cost of 264 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
-; AVX2: LV: Found an estimated cost of 528 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i16, ptr %in7, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i16, ptr %in7, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i16, ptr %in7, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 33 for VF 16 For instruction: %v7 = load i16, ptr %in7, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v0 = load i16, ptr %in0, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v1 = load i16, ptr %in1, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v2 = load i16, ptr %in2, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v3 = load i16, ptr %in3, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v4 = load i16, ptr %in4, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v5 = load i16, ptr %in5, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v6 = load i16, ptr %in6, align 2
+; AVX2: LV: Found an estimated cost of 66 for VF 32 For instruction: %v7 = load i16, ptr %in7, align 2
;
; AVX512DQ-LABEL: 'test'
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i16, ptr %in0, align 2
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
index c0ea210385dfda..28a6443efcfb9d 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-4-indices-01uu.ll
@@ -73,8 +73,8 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
; AVX512: LV: Found an estimated cost of 50 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512: LV: Found an estimated cost of 160 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll
index 2a261ca4de4fa4..5cad7bf662c5b3 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-5.ll
@@ -82,26 +82,26 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i32, ptr %in2, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i32, ptr %in3, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 85 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 170 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
@@ -139,11 +139,11 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512: LV: Found an estimated cost of 400 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v4 = load i32, ptr %in4, align 4
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll
index 8bf3071d29fbe1..cfb83d4a023653 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-7.ll
@@ -108,34 +108,34 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i32, ptr %in4, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 119 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 238 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 17 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
@@ -180,13 +180,13 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll
index 3182de2df058a8..775785462de474 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i32-stride-8.ll
@@ -121,22 +121,22 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i32, ptr %in5, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i32, ptr %in6, align 4
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i32, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i32, ptr %in7, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i32, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i32, ptr %in7, align 4
; AVX2: LV: Found an estimated cost of 48 for VF 8 For instruction: %v0 = load i32, ptr %in0, align 4
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i32, ptr %in1, align 4
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i32, ptr %in2, align 4
@@ -145,14 +145,14 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i32, ptr %in5, align 4
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i32, ptr %in6, align 4
; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX2: LV: Found an estimated cost of 272 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX2: LV: Found an estimated cost of 34 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i32, ptr %in0, align 4
@@ -203,14 +203,14 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i32, ptr %in5, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i32, ptr %in6, align 4
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i32, ptr %in7, align 4
-; AVX512: LV: Found an estimated cost of 320 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i32, ptr %in7, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i32, ptr %in0, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i32, ptr %in1, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i32, ptr %in2, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i32, ptr %in3, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i32, ptr %in4, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i32, ptr %in5, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load i32, ptr %in6, align 4
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v7 = load i32, ptr %in7, align 4
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll
index 27e2ee0392615b..cf350cc9f8307e 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-2.ll
@@ -71,10 +71,10 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
; AVX512: LV: Found an estimated cost of 22 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 160 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll
index c37723257c1f75..9ca0d8c9d7e33b 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-3.ll
@@ -88,12 +88,12 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 51 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 120 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 240 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 64 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 80 for VF 64 For instruction: %v2 = load i64, ptr %in2, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll
index 2eb7c5e93078f5..86ee6c8b30bda0 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-4.ll
@@ -107,14 +107,14 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 160 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll
index c11da4309737d2..f6143d4ae9f3b1 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-5.ll
@@ -72,21 +72,21 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i64, ptr %in2, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i64, ptr %in3, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 45 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 90 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
@@ -114,16 +114,16 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 100 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 200 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll
index de57af6ebe3984..43dc53d683de39 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-6.ll
@@ -133,18 +133,18 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 120 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 240 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll
index 949c1af1fdad3b..70ed74dcc26da3 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-7.ll
@@ -94,27 +94,27 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i64, ptr %in4, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 63 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 126 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
@@ -152,20 +152,20 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512: LV: Found an estimated cost of 140 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512: LV: Found an estimated cost of 280 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 40 for VF 32 For instruction: %v6 = load i64, ptr %in6, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll
index 4388ccfbdcfc4f..401e4de111d73f 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i64-stride-8.ll
@@ -105,30 +105,30 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i64, ptr %in5, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i64, ptr %in6, align 8
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX2: LV: Found an estimated cost of 72 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX2: LV: Found an estimated cost of 144 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i64, ptr %in7, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 9 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX2: LV: Found an estimated cost of 18 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8
;
; AVX512-LABEL: 'test'
; AVX512: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i64, ptr %in0, align 8
@@ -163,22 +163,22 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i64, ptr %in5, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i64, ptr %in6, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8
-; AVX512: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: %v7 = load i64, ptr %in7, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v0 = load i64, ptr %in0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v1 = load i64, ptr %in1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v2 = load i64, ptr %in2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v3 = load i64, ptr %in3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v4 = load i64, ptr %in4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v5 = load i64, ptr %in5, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v6 = load i64, ptr %in6, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: %v7 = load i64, ptr %in7, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
index 6078fb440f9d13..ef3c80c27550a3 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-5.ll
@@ -93,31 +93,31 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v2 = load i8, ptr %in2, align 1
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v3 = load i8, ptr %in3, align 1
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 20 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 40 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 80 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 160 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 325 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
;
; AVX512DQ-LABEL: 'test'
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
index 778a4e7dfd7d9d..8e7c316b69b3ac 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-7.ll
@@ -123,41 +123,41 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v4 = load i8, ptr %in4, align 1
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 28 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 56 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 112 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 224 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 455 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
;
; AVX512DQ-LABEL: 'test'
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll
index a230b5a0b1f2b7..752cc229922bea 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-load-i8-stride-8.ll
@@ -138,46 +138,46 @@ define void @test() {
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v5 = load i8, ptr %in5, align 1
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v6 = load i8, ptr %in6, align 1
; AVX2: LV: Found an estimated cost of 1 for VF 1 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2: LV: Found an estimated cost of 32 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2: LV: Found an estimated cost of 64 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2: LV: Found an estimated cost of 128 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2: LV: Found an estimated cost of 256 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
-; AVX2: LV: Found an estimated cost of 520 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
-; AVX2: LV: Found an estimated cost of 0 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 4 for VF 2 For instruction: %v7 = load i8, ptr %in7, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 8 for VF 4 For instruction: %v7 = load i8, ptr %in7, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 16 for VF 8 For instruction: %v7 = load i8, ptr %in7, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 32 for VF 16 For instruction: %v7 = load i8, ptr %in7, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v0 = load i8, ptr %in0, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v1 = load i8, ptr %in1, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v2 = load i8, ptr %in2, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v3 = load i8, ptr %in3, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v4 = load i8, ptr %in4, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v5 = load i8, ptr %in5, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v6 = load i8, ptr %in6, align 1
+; AVX2: LV: Found an estimated cost of 65 for VF 32 For instruction: %v7 = load i8, ptr %in7, align 1
;
; AVX512DQ-LABEL: 'test'
; AVX512DQ: LV: Found an estimated cost of 1 for VF 1 For instruction: %v0 = load i8, ptr %in0, align 1
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll
index c1a66c1a41d74f..ed2bb3f750b01d 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-f64-stride-8.ll
@@ -163,22 +163,22 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v5, ptr %out5, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store double %v6, ptr %out6, align 8
; AVX512: LV: Found an estimated cost of 46 for VF 4 For instruction: store double %v7, ptr %out7, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v0, ptr %out0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v1, ptr %out1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v2, ptr %out2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v3, ptr %out3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v4, ptr %out4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v5, ptr %out5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store double %v6, ptr %out6, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 8 For instruction: store double %v7, ptr %out7, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v0, ptr %out0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v1, ptr %out1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v2, ptr %out2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v3, ptr %out3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v4, ptr %out4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v5, ptr %out5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store double %v6, ptr %out6, align 8
-; AVX512: LV: Found an estimated cost of 160 for VF 16 For instruction: store double %v7, ptr %out7, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v0, ptr %out0, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v1, ptr %out1, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v2, ptr %out2, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v3, ptr %out3, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v4, ptr %out4, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v5, ptr %out5, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v6, ptr %out6, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store double %v7, ptr %out7, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v0, ptr %out0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v1, ptr %out1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v2, ptr %out2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v3, ptr %out3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v4, ptr %out4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v5, ptr %out5, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v6, ptr %out6, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store double %v7, ptr %out7, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll
index 7be9577960efe3..a5398771041291 100644
--- a/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll
+++ b/llvm/test/Analysis/CostModel/X86/interleaved-store-i64-stride-8.ll
@@ -163,22 +163,22 @@ define void @test() {
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v5, ptr %out5, align 8
; AVX512: LV: Found an estimated cost of 0 for VF 4 For instruction: store i64 %v6, ptr %out6, align 8
; AVX512: LV: Found an estimated cost of 46 for VF 4 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512: LV: Found an estimated cost of 80 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v0, ptr %out0, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v4, ptr %out4, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v5, ptr %out5, align 8
-; AVX512: LV: Found an estimated cost of 0 for VF 16 For instruction: store i64 %v6, ptr %out6, align 8
-; AVX512: LV: Found an estimated cost of 160 for VF 16 For instruction: store i64 %v7, ptr %out7, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v0, ptr %out0, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v1, ptr %out1, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v2, ptr %out2, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v3, ptr %out3, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v4, ptr %out4, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v5, ptr %out5, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v6, ptr %out6, align 8
+; AVX512: LV: Found an estimated cost of 10 for VF 8 For instruction: store i64 %v7, ptr %out7, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v0, ptr %out0, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v1, ptr %out1, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v2, ptr %out2, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v3, ptr %out3, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v4, ptr %out4, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v5, ptr %out5, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v6, ptr %out6, align 8
+; AVX512: LV: Found an estimated cost of 20 for VF 16 For instruction: store i64 %v7, ptr %out7, align 8
;
entry:
br label %for.body
diff --git a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll
index 13a844230f89d9..41dd266d0a87ac 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-interleaved-store-i16.ll
@@ -38,8 +38,8 @@ define void @test1(ptr noalias nocapture %points, ptr noalias nocapture readonly
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 12 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 6 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
@@ -99,8 +99,8 @@ define void @test2(ptr noalias nocapture %points, i32 %numPoints, ptr noalias no
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %0, ptr %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 1 for VF 1 For instruction: store i16 %2, ptr %arrayidx7, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
-; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 10 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %0, ptr %arrayidx2, align 2
+; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 5 for VF 2 For instruction: store i16 %2, ptr %arrayidx7, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 4 For instruction: store i16 %0, ptr %arrayidx2, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 14 for VF 4 For instruction: store i16 %2, ptr %arrayidx7, align 2
; ENABLED_MASKED_STRIDED: LV: Found an estimated cost of 0 for VF 8 For instruction: store i16 %0, ptr %arrayidx2, align 2
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
index 430b0db87b8846..d1d1b0ab100fb8 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved-vs-scalar.ll
@@ -7,8 +7,8 @@ target triple = "aarch64--linux-gnu"
%pair = type { i8, i8 }
; CHECK-LABEL: test
-; CHECK: Found an estimated cost of 16 for VF 2 For instruction: {{.*}} load i8
-; CHECK: Found an estimated cost of 0 for VF 2 For instruction: {{.*}} load i8
+; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8
+; CHECK: Found an estimated cost of 8 for VF 2 For instruction: {{.*}} load i8
; CHECK-LABEL: entry:
; CHECK-LABEL: vector.body:
; CHECK: [[LOAD1:%.*]] = load i8
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
index 21af9ae801e16c..dec124b55cd4e0 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -168,8 +168,8 @@ entry:
; gaps.
;
; VF_2-LABEL: Checking a loop in 'i64_factor_8'
-; VF_2: Found an estimated cost of 16 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_2: Found an estimated cost of 8 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 8 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8
; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8
for.body:
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
index c7a04e3669ed62..976c6a9a570af9 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
@@ -15,10 +15,10 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i8_factor_2'
-; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp2, ptr %tmp0, align 1
-; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i8 %tmp3, ptr %tmp1, align 1
+; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp2, ptr %tmp0, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp3, ptr %tmp1, align 1
; VF_4-LABEL: Checking a loop in 'i8_factor_2'
; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
@@ -56,10 +56,10 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i16_factor_2'
-; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp2, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i16 %tmp3, ptr %tmp1, align 2
+; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp3, ptr %tmp1, align 2
; VF_4-LABEL: Checking a loop in 'i16_factor_2'
; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
@@ -97,10 +97,10 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i32_factor_2'
-; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
-; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4
+; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4
; VF_4-LABEL: Checking a loop in 'i32_factor_2'
; VF_4: Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
@@ -138,25 +138,25 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i64_factor_2'
-; VF_2: Found an estimated cost of 44 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 44 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8
+; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8
; VF_4-LABEL: Checking a loop in 'i64_factor_2'
-; VF_4: Found an estimated cost of 88 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 88 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8
+; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8
; VF_8-LABEL: Checking a loop in 'i64_factor_2'
-; VF_8: Found an estimated cost of 176 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 176 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8
+; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8
; VF_16-LABEL: Checking a loop in 'i64_factor_2'
-; VF_16: Found an estimated cost of 352 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 352 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8
+; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0
@@ -179,10 +179,10 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'f16_factor_2'
-; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp2, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store half %tmp3, ptr %tmp1, align 2
+; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp2, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp3, ptr %tmp1, align 2
; VF_4-LABEL: Checking a loop in 'f16_factor_2'
; VF_4: Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2
@@ -261,25 +261,25 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'f64_factor_2'
-; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp2, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store double %tmp3, ptr %tmp1, align 8
+; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp2, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp3, ptr %tmp1, align 8
; VF_4-LABEL: Checking a loop in 'f64_factor_2'
-; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp2, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store double %tmp3, ptr %tmp1, align 8
+; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp2, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp3, ptr %tmp1, align 8
; VF_8-LABEL: Checking a loop in 'f64_factor_2'
-; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp2, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store double %tmp3, ptr %tmp1, align 8
+; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp2, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp3, ptr %tmp1, align 8
; VF_16-LABEL: Checking a loop in 'f64_factor_2'
-; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp2, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store double %tmp3, ptr %tmp1, align 8
+; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp2, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp3, ptr %tmp1, align 8
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 0
@@ -306,33 +306,33 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i8_factor_3'
-; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp3, ptr %tmp0, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp4, ptr %tmp1, align 1
-; VF_2-NEXT: Found an estimated cost of 36 for VF 2 For instruction: store i8 %tmp5, ptr %tmp2, align 1
+; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp3, ptr %tmp0, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp4, ptr %tmp1, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp5, ptr %tmp2, align 1
; VF_4-LABEL: Checking a loop in 'i8_factor_3'
-; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp3, ptr %tmp0, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp4, ptr %tmp1, align 1
-; VF_4-NEXT: Found an estimated cost of 72 for VF 4 For instruction: store i8 %tmp5, ptr %tmp2, align 1
+; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp3, ptr %tmp0, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp4, ptr %tmp1, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp5, ptr %tmp2, align 1
; VF_8-LABEL: Checking a loop in 'i8_factor_3'
-; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp3, ptr %tmp0, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp4, ptr %tmp1, align 1
-; VF_8-NEXT: Found an estimated cost of 144 for VF 8 For instruction: store i8 %tmp5, ptr %tmp2, align 1
+; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp3, ptr %tmp0, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp4, ptr %tmp1, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp5, ptr %tmp2, align 1
; VF_16-LABEL: Checking a loop in 'i8_factor_3'
-; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp3, ptr %tmp0, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp4, ptr %tmp1, align 1
-; VF_16-NEXT: Found an estimated cost of 288 for VF 16 For instruction: store i8 %tmp5, ptr %tmp2, align 1
+; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp3, ptr %tmp0, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp4, ptr %tmp1, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp5, ptr %tmp2, align 1
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0
@@ -358,33 +358,33 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i16_factor_3'
-; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp3, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp4, ptr %tmp1, align 2
-; VF_2-NEXT: Found an estimated cost of 36 for VF 2 For instruction: store i16 %tmp5, ptr %tmp2, align 2
+; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp3, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp4, ptr %tmp1, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp5, ptr %tmp2, align 2
; VF_4-LABEL: Checking a loop in 'i16_factor_3'
-; VF_4: Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp3, ptr %tmp0, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp4, ptr %tmp1, align 2
-; VF_4-NEXT: Found an estimated cost of 72 for VF 4 For instruction: store i16 %tmp5, ptr %tmp2, align 2
+; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp3, ptr %tmp0, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp4, ptr %tmp1, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp5, ptr %tmp2, align 2
; VF_8-LABEL: Checking a loop in 'i16_factor_3'
-; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp3, ptr %tmp0, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp4, ptr %tmp1, align 2
-; VF_8-NEXT: Found an estimated cost of 144 for VF 8 For instruction: store i16 %tmp5, ptr %tmp2, align 2
+; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp3, ptr %tmp0, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp4, ptr %tmp1, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp5, ptr %tmp2, align 2
; VF_16-LABEL: Checking a loop in 'i16_factor_3'
-; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp3, ptr %tmp0, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp4, ptr %tmp1, align 2
-; VF_16-NEXT: Found an estimated cost of 288 for VF 16 For instruction: store i16 %tmp5, ptr %tmp2, align 2
+; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp3, ptr %tmp0, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp4, ptr %tmp1, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp5, ptr %tmp2, align 2
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 0
@@ -410,33 +410,33 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i32_factor_3'
-; VF_2: Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp3, ptr %tmp0, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp4, ptr %tmp1, align 4
-; VF_2-NEXT: Found an estimated cost of 36 for VF 2 For instruction: store i32 %tmp5, ptr %tmp2, align 4
+; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp3, ptr %tmp0, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp4, ptr %tmp1, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp5, ptr %tmp2, align 4
; VF_4-LABEL: Checking a loop in 'i32_factor_3'
-; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp3, ptr %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp4, ptr %tmp1, align 4
-; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i32 %tmp5, ptr %tmp2, align 4
+; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp3, ptr %tmp0, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp4, ptr %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp5, ptr %tmp2, align 4
; VF_8-LABEL: Checking a loop in 'i32_factor_3'
-; VF_8: Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp3, ptr %tmp0, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp4, ptr %tmp1, align 4
-; VF_8-NEXT: Found an estimated cost of 144 for VF 8 For instruction: store i32 %tmp5, ptr %tmp2, align 4
+; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp3, ptr %tmp0, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp4, ptr %tmp1, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp5, ptr %tmp2, align 4
; VF_16-LABEL: Checking a loop in 'i32_factor_3'
-; VF_16: Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp3, ptr %tmp0, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp4, ptr %tmp1, align 4
-; VF_16-NEXT: Found an estimated cost of 288 for VF 16 For instruction: store i32 %tmp5, ptr %tmp2, align 4
+; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp3, ptr %tmp0, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp4, ptr %tmp1, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp5, ptr %tmp2, align 4
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 0
@@ -462,33 +462,33 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i64_factor_3'
-; VF_2: Found an estimated cost of 66 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp3, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp4, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 66 for VF 2 For instruction: store i64 %tmp5, ptr %tmp2, align 8
+; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp3, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp4, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp5, ptr %tmp2, align 8
; VF_4-LABEL: Checking a loop in 'i64_factor_3'
-; VF_4: Found an estimated cost of 132 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp3, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp4, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 132 for VF 4 For instruction: store i64 %tmp5, ptr %tmp2, align 8
+; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp3, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp4, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp5, ptr %tmp2, align 8
; VF_8-LABEL: Checking a loop in 'i64_factor_3'
-; VF_8: Found an estimated cost of 264 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp3, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp4, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 264 for VF 8 For instruction: store i64 %tmp5, ptr %tmp2, align 8
+; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp3, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp4, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp5, ptr %tmp2, align 8
; VF_16-LABEL: Checking a loop in 'i64_factor_3'
-; VF_16: Found an estimated cost of 528 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp3, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp4, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 528 for VF 16 For instruction: store i64 %tmp5, ptr %tmp2, align 8
+; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp3, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp4, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp5, ptr %tmp2, align 8
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 0
@@ -514,12 +514,12 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'f16_factor_3'
-; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp3, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store half %tmp4, ptr %tmp1, align 2
-; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store half %tmp5, ptr %tmp2, align 2
+; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp3, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp4, ptr %tmp1, align 2
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store half %tmp5, ptr %tmp2, align 2
; VF_4-LABEL: Checking a loop in 'f16_factor_3'
; VF_4: Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, ptr %tmp0, align 2
; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, ptr %tmp1, align 2
@@ -573,12 +573,12 @@ entry:
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp4, ptr %tmp1, align 4
; VF_2-NEXT: Found an estimated cost of 16 for VF 2 For instruction: store float %tmp5, ptr %tmp2, align 4
; VF_4-LABEL: Checking a loop in 'f32_factor_3'
-; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, ptr %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, ptr %tmp1, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp2, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp3, ptr %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp4, ptr %tmp1, align 4
-; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store float %tmp5, ptr %tmp2, align 4
+; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp3 = load float, ptr %tmp0, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load float, ptr %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load float, ptr %tmp2, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp3, ptr %tmp0, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp4, ptr %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp5, ptr %tmp2, align 4
; VF_8-LABEL: Checking a loop in 'f32_factor_3'
; VF_8: Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, ptr %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, ptr %tmp1, align 4
@@ -618,33 +618,33 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'f64_factor_3'
-; VF_2: Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp3, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp4, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 18 for VF 2 For instruction: store double %tmp5, ptr %tmp2, align 8
+; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp3, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp4, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp5, ptr %tmp2, align 8
; VF_4-LABEL: Checking a loop in 'f64_factor_3'
-; VF_4: Found an estimated cost of 36 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp3, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp4, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 36 for VF 4 For instruction: store double %tmp5, ptr %tmp2, align 8
+; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp3, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp4, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp5, ptr %tmp2, align 8
; VF_8-LABEL: Checking a loop in 'f64_factor_3'
-; VF_8: Found an estimated cost of 72 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp3, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp4, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 72 for VF 8 For instruction: store double %tmp5, ptr %tmp2, align 8
+; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp3, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp4, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp5, ptr %tmp2, align 8
; VF_16-LABEL: Checking a loop in 'f64_factor_3'
-; VF_16: Found an estimated cost of 144 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp3, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp4, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 144 for VF 16 For instruction: store double %tmp5, ptr %tmp2, align 8
+; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp3, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp4, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp5, ptr %tmp2, align 8
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 0
@@ -673,41 +673,41 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i8_factor_4'
-; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp4, ptr %tmp0, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp5, ptr %tmp1, align 1
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp6, ptr %tmp2, align 1
-; VF_2-NEXT: Found an estimated cost of 48 for VF 2 For instruction: store i8 %tmp7, ptr %tmp3, align 1
+; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp4, ptr %tmp0, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp5, ptr %tmp1, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp6, ptr %tmp2, align 1
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i8 %tmp7, ptr %tmp3, align 1
; VF_4-LABEL: Checking a loop in 'i8_factor_4'
-; VF_4: Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp4, ptr %tmp0, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp5, ptr %tmp1, align 1
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp6, ptr %tmp2, align 1
-; VF_4-NEXT: Found an estimated cost of 96 for VF 4 For instruction: store i8 %tmp7, ptr %tmp3, align 1
+; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp4, ptr %tmp0, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp5, ptr %tmp1, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp6, ptr %tmp2, align 1
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i8 %tmp7, ptr %tmp3, align 1
; VF_8-LABEL: Checking a loop in 'i8_factor_4'
-; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp4, ptr %tmp0, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp5, ptr %tmp1, align 1
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp6, ptr %tmp2, align 1
-; VF_8-NEXT: Found an estimated cost of 192 for VF 8 For instruction: store i8 %tmp7, ptr %tmp3, align 1
+; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp4, ptr %tmp0, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp5, ptr %tmp1, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp6, ptr %tmp2, align 1
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i8 %tmp7, ptr %tmp3, align 1
; VF_16-LABEL: Checking a loop in 'i8_factor_4'
-; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp4, ptr %tmp0, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp5, ptr %tmp1, align 1
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp6, ptr %tmp2, align 1
-; VF_16-NEXT: Found an estimated cost of 384 for VF 16 For instruction: store i8 %tmp7, ptr %tmp3, align 1
+; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp4, ptr %tmp0, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp5, ptr %tmp1, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp6, ptr %tmp2, align 1
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i8 %tmp7, ptr %tmp3, align 1
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0
@@ -736,41 +736,41 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i16_factor_4'
-; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp4, ptr %tmp0, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp5, ptr %tmp1, align 2
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp6, ptr %tmp2, align 2
-; VF_2-NEXT: Found an estimated cost of 48 for VF 2 For instruction: store i16 %tmp7, ptr %tmp3, align 2
+; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp4, ptr %tmp0, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp5, ptr %tmp1, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp6, ptr %tmp2, align 2
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i16 %tmp7, ptr %tmp3, align 2
; VF_4-LABEL: Checking a loop in 'i16_factor_4'
-; VF_4: Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp4, ptr %tmp0, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp5, ptr %tmp1, align 2
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp6, ptr %tmp2, align 2
-; VF_4-NEXT: Found an estimated cost of 96 for VF 4 For instruction: store i16 %tmp7, ptr %tmp3, align 2
+; VF_4: Found an estimated cost of 24 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp4, ptr %tmp0, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp5, ptr %tmp1, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp6, ptr %tmp2, align 2
+; VF_4-NEXT: Found an estimated cost of 24 for VF 4 For instruction: store i16 %tmp7, ptr %tmp3, align 2
; VF_8-LABEL: Checking a loop in 'i16_factor_4'
-; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp4, ptr %tmp0, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp5, ptr %tmp1, align 2
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp6, ptr %tmp2, align 2
-; VF_8-NEXT: Found an estimated cost of 192 for VF 8 For instruction: store i16 %tmp7, ptr %tmp3, align 2
+; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp4, ptr %tmp0, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp5, ptr %tmp1, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp6, ptr %tmp2, align 2
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i16 %tmp7, ptr %tmp3, align 2
; VF_16-LABEL: Checking a loop in 'i16_factor_4'
-; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp4, ptr %tmp0, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp5, ptr %tmp1, align 2
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp6, ptr %tmp2, align 2
-; VF_16-NEXT: Found an estimated cost of 384 for VF 16 For instruction: store i16 %tmp7, ptr %tmp3, align 2
+; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp4, ptr %tmp0, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp5, ptr %tmp1, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp6, ptr %tmp2, align 2
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i16 %tmp7, ptr %tmp3, align 2
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 0
@@ -799,41 +799,41 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i32_factor_4'
-; VF_2: Found an estimated cost of 48 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp4, ptr %tmp0, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp5, ptr %tmp1, align 4
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp6, ptr %tmp2, align 4
-; VF_2-NEXT: Found an estimated cost of 48 for VF 2 For instruction: store i32 %tmp7, ptr %tmp3, align 4
+; VF_2: Found an estimated cost of 12 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp4, ptr %tmp0, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp5, ptr %tmp1, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp6, ptr %tmp2, align 4
+; VF_2-NEXT: Found an estimated cost of 12 for VF 2 For instruction: store i32 %tmp7, ptr %tmp3, align 4
; VF_4-LABEL: Checking a loop in 'i32_factor_4'
-; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp4, ptr %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp5, ptr %tmp1, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp6, ptr %tmp2, align 4
-; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store i32 %tmp7, ptr %tmp3, align 4
+; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp4, ptr %tmp0, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp5, ptr %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp6, ptr %tmp2, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store i32 %tmp7, ptr %tmp3, align 4
; VF_8-LABEL: Checking a loop in 'i32_factor_4'
-; VF_8: Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp4, ptr %tmp0, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp5, ptr %tmp1, align 4
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp6, ptr %tmp2, align 4
-; VF_8-NEXT: Found an estimated cost of 192 for VF 8 For instruction: store i32 %tmp7, ptr %tmp3, align 4
+; VF_8: Found an estimated cost of 48 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp4, ptr %tmp0, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp5, ptr %tmp1, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp6, ptr %tmp2, align 4
+; VF_8-NEXT: Found an estimated cost of 48 for VF 8 For instruction: store i32 %tmp7, ptr %tmp3, align 4
; VF_16-LABEL: Checking a loop in 'i32_factor_4'
-; VF_16: Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp4, ptr %tmp0, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp5, ptr %tmp1, align 4
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp6, ptr %tmp2, align 4
-; VF_16-NEXT: Found an estimated cost of 384 for VF 16 For instruction: store i32 %tmp7, ptr %tmp3, align 4
+; VF_16: Found an estimated cost of 96 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp4, ptr %tmp0, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp5, ptr %tmp1, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp6, ptr %tmp2, align 4
+; VF_16-NEXT: Found an estimated cost of 96 for VF 16 For instruction: store i32 %tmp7, ptr %tmp3, align 4
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 0
@@ -862,41 +862,41 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'i64_factor_4'
-; VF_2: Found an estimated cost of 88 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp4, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp5, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp6, ptr %tmp2, align 8
-; VF_2-NEXT: Found an estimated cost of 88 for VF 2 For instruction: store i64 %tmp7, ptr %tmp3, align 8
+; VF_2: Found an estimated cost of 22 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp4, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp5, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp6, ptr %tmp2, align 8
+; VF_2-NEXT: Found an estimated cost of 22 for VF 2 For instruction: store i64 %tmp7, ptr %tmp3, align 8
; VF_4-LABEL: Checking a loop in 'i64_factor_4'
-; VF_4: Found an estimated cost of 176 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp4, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp5, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp6, ptr %tmp2, align 8
-; VF_4-NEXT: Found an estimated cost of 176 for VF 4 For instruction: store i64 %tmp7, ptr %tmp3, align 8
+; VF_4: Found an estimated cost of 44 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp4, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp5, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp6, ptr %tmp2, align 8
+; VF_4-NEXT: Found an estimated cost of 44 for VF 4 For instruction: store i64 %tmp7, ptr %tmp3, align 8
; VF_8-LABEL: Checking a loop in 'i64_factor_4'
-; VF_8: Found an estimated cost of 352 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp4, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp5, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp6, ptr %tmp2, align 8
-; VF_8-NEXT: Found an estimated cost of 352 for VF 8 For instruction: store i64 %tmp7, ptr %tmp3, align 8
+; VF_8: Found an estimated cost of 88 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp4, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp5, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp6, ptr %tmp2, align 8
+; VF_8-NEXT: Found an estimated cost of 88 for VF 8 For instruction: store i64 %tmp7, ptr %tmp3, align 8
; VF_16-LABEL: Checking a loop in 'i64_factor_4'
-; VF_16: Found an estimated cost of 704 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp4, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp5, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp6, ptr %tmp2, align 8
-; VF_16-NEXT: Found an estimated cost of 704 for VF 16 For instruction: store i64 %tmp7, ptr %tmp3, align 8
+; VF_16: Found an estimated cost of 176 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp4, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp5, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp6, ptr %tmp2, align 8
+; VF_16-NEXT: Found an estimated cost of 176 for VF 16 For instruction: store i64 %tmp7, ptr %tmp3, align 8
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 0
@@ -997,14 +997,14 @@ entry:
; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store float %tmp6, ptr %tmp2, align 4
; VF_2-NEXT: Found an estimated cost of 20 for VF 2 For instruction: store float %tmp7, ptr %tmp3, align 4
; VF_4-LABEL: Checking a loop in 'f32_factor_4'
-; VF_4: Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, ptr %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp1, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, ptr %tmp2, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, ptr %tmp3, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp4, ptr %tmp0, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp5, ptr %tmp1, align 4
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store float %tmp6, ptr %tmp2, align 4
-; VF_4-NEXT: Found an estimated cost of 32 for VF 4 For instruction: store float %tmp7, ptr %tmp3, align 4
+; VF_4: Found an estimated cost of 8 for VF 4 For instruction: %tmp4 = load float, ptr %tmp0, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp5 = load float, ptr %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp6 = load float, ptr %tmp2, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: %tmp7 = load float, ptr %tmp3, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp4, ptr %tmp0, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp5, ptr %tmp1, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp6, ptr %tmp2, align 4
+; VF_4-NEXT: Found an estimated cost of 8 for VF 4 For instruction: store float %tmp7, ptr %tmp3, align 4
; VF_8-LABEL: Checking a loop in 'f32_factor_4'
; VF_8: Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, ptr %tmp0, align 4
; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp1, align 4
@@ -1051,41 +1051,41 @@ entry:
br label %for.body
; VF_2-LABEL: Checking a loop in 'f64_factor_4'
-; VF_2: Found an estimated cost of 24 for VF 2 For instruction: %tmp4 = load double, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp4, ptr %tmp0, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp5, ptr %tmp1, align 8
-; VF_2-NEXT: Found an estimated cost of 0 for VF 2 For instruction: store double %tmp6, ptr %tmp2, align 8
-; VF_2-NEXT: Found an estimated cost of 24 for VF 2 For instruction: store double %tmp7, ptr %tmp3, align 8
+; VF_2: Found an estimated cost of 6 for VF 2 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp4, ptr %tmp0, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp5, ptr %tmp1, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp6, ptr %tmp2, align 8
+; VF_2-NEXT: Found an estimated cost of 6 for VF 2 For instruction: store double %tmp7, ptr %tmp3, align 8
; VF_4-LABEL: Checking a loop in 'f64_factor_4'
-; VF_4: Found an estimated cost of 48 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp4, ptr %tmp0, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp5, ptr %tmp1, align 8
-; VF_4-NEXT: Found an estimated cost of 0 for VF 4 For instruction: store double %tmp6, ptr %tmp2, align 8
-; VF_4-NEXT: Found an estimated cost of 48 for VF 4 For instruction: store double %tmp7, ptr %tmp3, align 8
+; VF_4: Found an estimated cost of 12 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp4, ptr %tmp0, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp5, ptr %tmp1, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp6, ptr %tmp2, align 8
+; VF_4-NEXT: Found an estimated cost of 12 for VF 4 For instruction: store double %tmp7, ptr %tmp3, align 8
; VF_8-LABEL: Checking a loop in 'f64_factor_4'
-; VF_8: Found an estimated cost of 96 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp4, ptr %tmp0, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp5, ptr %tmp1, align 8
-; VF_8-NEXT: Found an estimated cost of 0 for VF 8 For instruction: store double %tmp6, ptr %tmp2, align 8
-; VF_8-NEXT: Found an estimated cost of 96 for VF 8 For instruction: store double %tmp7, ptr %tmp3, align 8
+; VF_8: Found an estimated cost of 24 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp4, ptr %tmp0, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp5, ptr %tmp1, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp6, ptr %tmp2, align 8
+; VF_8-NEXT: Found an estimated cost of 24 for VF 8 For instruction: store double %tmp7, ptr %tmp3, align 8
; VF_16-LABEL: Checking a loop in 'f64_factor_4'
-; VF_16: Found an estimated cost of 192 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp4, ptr %tmp0, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp5, ptr %tmp1, align 8
-; VF_16-NEXT: Found an estimated cost of 0 for VF 16 For instruction: store double %tmp6, ptr %tmp2, align 8
-; VF_16-NEXT: Found an estimated cost of 192 for VF 16 For instruction: store double %tmp7, ptr %tmp3, align 8
+; VF_16: Found an estimated cost of 48 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp4, ptr %tmp0, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp5, ptr %tmp1, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp6, ptr %tmp2, align 8
+; VF_16-NEXT: Found an estimated cost of 48 for VF 16 For instruction: store double %tmp7, ptr %tmp3, align 8
for.body:
%i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
%tmp0 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 0
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
index 6d309c4453c7e1..df02cb741700e5 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll
@@ -176,7 +176,7 @@ declare i16 @llvm.umax.i16(i16, i16)
; Test case for https://github.com/llvm/llvm-project/issues/106780.
define i32 @cost_of_exit_branch_and_cond_insts(ptr %a, ptr %b, i1 %c, i16 %x) #0 {
; CHECK-LABEL: define i32 @cost_of_exit_branch_and_cond_insts(
-; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i1 [[C:%.*]], i16 [[X:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]], i1 [[C:%.*]], i16 [[X:%.*]]) #[[ATTR2:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext i16 [[X]] to i32
; CHECK-NEXT: [[UMAX3:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP0]], i32 111)
@@ -404,6 +404,95 @@ exit:
ret void
}
+; Test for https://github.com/llvm/llvm-project/issues/108098.
+define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %src, ptr noalias %dst) #0 {
+; CHECK-LABEL: define void @gather_interleave_group_with_dead_insert_pos(
+; CHECK-SAME: i64 [[N:%.*]], ptr noalias [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N]], i64 0)
+; CHECK-NEXT: [[TMP0:%.*]] = add nuw i64 [[SMAX]], 3
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 16
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i64 16, i64 [[N_MOD_VF]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[IND_END:%.*]] = mul i64 [[N_VEC]], 4
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 4, i64 8, i64 12, i64 16, i64 20, i64 24, i64 28>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 32
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr i8, ptr [[TMP7]], i32 0
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[TMP8]], i32 0
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <32 x i8>, ptr [[TMP9]], align 1
+; CHECK-NEXT: [[WIDE_VEC2:%.*]] = load <32 x i8>, ptr [[TMP10]], align 1
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> <i32 0, i32 4, i32 8, i32 12, i32 16, i32 20, i32 24, i32 28>
+; CHECK-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-NEXT: [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC2]], <32 x i8> poison, <8 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29>
+; CHECK-NEXT: [[TMP11:%.*]] = zext <8 x i8> [[STRIDED_VEC4]] to <8 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = zext <8 x i8> [[STRIDED_VEC5]] to <8 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[VEC_IND]]
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST]], <8 x i64> [[STEP_ADD]]
+; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP11]], <8 x ptr> [[TMP13]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> [[TMP12]], <8 x ptr> [[TMP14]], i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[L_DEAD:%.*]] = load i8, ptr [[GEP_SRC_0]], align 1
+; CHECK-NEXT: [[IV_1:%.*]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV_1]]
+; CHECK-NEXT: [[L_1:%.*]] = load i8, ptr [[GEP_SRC_1]], align 1
+; CHECK-NEXT: [[EXT:%.*]] = zext i8 [[L_1]] to i32
+; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV]]
+; CHECK-NEXT: store i32 [[EXT]], ptr [[GEP_DST]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 4
+; CHECK-NEXT: [[EC:%.*]] = icmp slt i64 [[IV]], [[N]]
+; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ %iv.next, %loop ], [ 0, %entry ]
+ %gep.src.0 = getelementptr i8, ptr %src, i64 %iv
+ %l.dead = load i8, ptr %gep.src.0, align 1
+ %iv.1 = add i64 %iv, 1
+ %gep.src.1 = getelementptr i8, ptr %src, i64 %iv.1
+ %l.1 = load i8, ptr %gep.src.1, align 1
+ %ext = zext i8 %l.1 to i32
+ %gep.dst = getelementptr i32, ptr %dst, i64 %iv
+ store i32 %ext, ptr %gep.dst, align 4
+ %iv.next = add nsw i64 %iv, 4
+ %ec = icmp slt i64 %iv, %N
+ br i1 %ec, label %loop, label %exit
+
+exit:
+ ret void
+}
+
+attributes #0 = { "target-features"="+64bit,+v" }
+
;.
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -425,4 +514,6 @@ exit:
; CHECK: [[LOOP17]] = distinct !{[[LOOP17]], [[META1]]}
; CHECK: [[LOOP18]] = distinct !{[[LOOP18]], [[META1]], [[META2]]}
; CHECK: [[LOOP19]] = distinct !{[[LOOP19]], [[META2]], [[META1]]}
+; CHECK: [[LOOP20]] = distinct !{[[LOOP20]], [[META1]], [[META2]]}
+; CHECK: [[LOOP21]] = distinct !{[[LOOP21]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll
index 88eb9c4d27e33a..13c443c4d579f4 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/mem-interleaving-costs-03.ll
@@ -6,7 +6,8 @@
; Check cost function for <8 x i128> store interleave group.
; CHECK: LV: Checking a loop in 'fun'
-; CHECK: LV: Found an estimated cost of 8 for VF 4 For instruction: store i128 8721036757475490113
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i128 8721036757475490113
+; CHECK: LV: Found an estimated cost of 4 for VF 4 For instruction: store i128 8721036757475490113
define noundef i32 @fun(i32 %argc, ptr nocapture readnone %argv) {
entry:
More information about the llvm-commits
mailing list