[llvm] c8c0b18 - [LV] Update tests to not have dead interleave groups.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 21 06:04:22 PDT 2024


Author: Florian Hahn
Date: 2024-07-21T14:03:40+01:00
New Revision: c8c0b18b5d58415b79ea13b62eee70c130c26a0a

URL: https://github.com/llvm/llvm-project/commit/c8c0b18b5d58415b79ea13b62eee70c130c26a0a
DIFF: https://github.com/llvm/llvm-project/commit/c8c0b18b5d58415b79ea13b62eee70c130c26a0a.diff

LOG: [LV] Update tests to not have dead interleave groups.

Update existing tests with dead interleave groups by adding users. This
ensures the tests keep testing what they were intended to test with a
planned change to skip unused instructions in cost computations.

Added: 
    

Modified: 
    llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
    llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
    llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
    llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll
    llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
index 78798725b5b9c..21af9ae801e16 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -15,21 +15,21 @@ entry:
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_2'
 ; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp2, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 %tmp3, ptr %tmp1, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
 ; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp2, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 %tmp3, ptr %tmp1, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i8, ptr %tmp0, align 1
   %tmp3 = load i8, ptr %tmp1, align 1
-  store i8 0, ptr %tmp0, align 1
-  store i8 0, ptr %tmp1, align 1
+  store i8 %tmp2, ptr %tmp0, align 1
+  store i8 %tmp3, ptr %tmp1, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -46,26 +46,26 @@ entry:
 ; VF_4-LABEL: Checking a loop in 'i16_factor_2'
 ; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_2'
 ; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
 ; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i16, ptr %tmp0, align 2
   %tmp3 = load i16, ptr %tmp1, align 2
-  store i16 0, ptr %tmp0, align 2
-  store i16 0, ptr %tmp1, align 2
+  store i16 %tmp2, ptr %tmp0, align 2
+  store i16 %tmp3, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -82,31 +82,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_2'
 ; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_2'
 ; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_2'
 ; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
 ; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i32, ptr %tmp0, align 4
   %tmp3 = load i32, ptr %tmp1, align 4
-  store i32 0, ptr %tmp0, align 4
-  store i32 0, ptr %tmp1, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
+  store i32 %tmp3, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -123,31 +123,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_2'
 ; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_2'
 ; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_2'
 ; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_2'
 ; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i64, ptr %tmp0, align 8
   %tmp3 = load i64, ptr %tmp1, align 8
-  store i64 0, ptr %tmp0, align 8
-  store i64 0, ptr %tmp1, align 8
+  store i64 %tmp2, ptr %tmp0, align 8
+  store i64 %tmp3, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -170,16 +170,16 @@ entry:
 ; VF_2-LABEL: Checking a loop in 'i64_factor_8'
 ; VF_2:         Found an estimated cost of 16 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
 ; VF_2-NEXT:    Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_2-NEXT:    Found an estimated cost of 8 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_2-NEXT:    Found an estimated cost of 8 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_2-NEXT:    Found an estimated cost of 12 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_2-NEXT:    Found an estimated cost of 12 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 2
   %tmp1 = getelementptr inbounds %i64.8, ptr %data, i64 %i, i32 6
   %tmp2 = load i64, ptr %tmp0, align 8
   %tmp3 = load i64, ptr %tmp1, align 8
-  store i64 0, ptr %tmp0, align 8
-  store i64 0, ptr %tmp1, align 8
+  store i64 %tmp2, ptr %tmp0, align 8
+  store i64 %tmp3, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
index d08c16dcb3ed1..214d9abd712cd 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/interleaved_cost.ll
@@ -15,21 +15,21 @@ entry:
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_2'
 ; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp2, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i8 %tmp3, ptr %tmp1, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
 ; VF_16:         Found an estimated cost of 2 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp2, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 2 for VF 16 For instruction: store i8 %tmp3, ptr %tmp1, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i8, ptr %tmp0, align 1
   %tmp3 = load i8, ptr %tmp1, align 1
-  store i8 0, ptr %tmp0, align 1
-  store i8 0, ptr %tmp1, align 1
+  store i8 %tmp2, ptr %tmp0, align 1
+  store i8 %tmp3, ptr %tmp1, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -46,26 +46,26 @@ entry:
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_2'
 ; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_2'
 ; VF_8:          Found an estimated cost of 2 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 2 for VF 8 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
 ; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i16, ptr %tmp0, align 2
   %tmp3 = load i16, ptr %tmp1, align 2
-  store i16 0, ptr %tmp0, align 2
-  store i16 0, ptr %tmp1, align 2
+  store i16 %tmp2, ptr %tmp0, align 2
+  store i16 %tmp3, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -82,31 +82,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_2'
 ; VF_2:          Found an estimated cost of 2 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 2 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_2'
 ; VF_4:          Found an estimated cost of 2 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 2 for VF 4 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_2'
 ; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
 ; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i32, ptr %tmp0, align 4
   %tmp3 = load i32, ptr %tmp1, align 4
-  store i32 0, ptr %tmp0, align 4
-  store i32 0, ptr %tmp1, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
+  store i32 %tmp3, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -123,21 +123,21 @@ entry:
 ; VF_4-LABEL: Checking a loop in 'half_factor_2'
 ; VF_4:         Found an estimated cost of 40 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2
 ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store half %tmp2, ptr %tmp0, align 2
+; VF_4-NEXT:    Found an estimated cost of 40 for VF 4 For instruction: store half %tmp3, ptr %tmp1, align 2
 ; VF_8-LABEL: Checking a loop in 'half_factor_2'
 ; VF_8:         Found an estimated cost of 80 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2
 ; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2
-; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_8-NEXT:    Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_8-NEXT:    Found an estimated cost of 0 for VF 8 For instruction: store half %tmp2, ptr %tmp0, align 2
+; VF_8-NEXT:    Found an estimated cost of 80 for VF 8 For instruction: store half %tmp3, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %half.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %half.2, ptr %data, i64 %i, i32 1
   %tmp2 = load half, ptr %tmp0, align 2
   %tmp3 = load half, ptr %tmp1, align 2
-  store half 0., ptr %tmp0, align 2
-  store half 0., ptr %tmp1, align 2
+  store half %tmp2, ptr %tmp0, align 2
+  store half %tmp3, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
index 97aa2035ded55..c7a04e3669ed6 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-interleaved-cost.ll
@@ -17,31 +17,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'i8_factor_2'
 ; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp2, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i8 %tmp3, ptr %tmp1, align 1
 ; VF_4-LABEL:  Checking a loop in 'i8_factor_2'
 ; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp2, ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i8 %tmp3, ptr %tmp1, align 1
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_2'
 ; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp2, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i8 %tmp3, ptr %tmp1, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_2'
 ; VF_16:         Found an estimated cost of 4 for VF 16 For instruction: %tmp2 = load i8, ptr %tmp0, align 1
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp2, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 4 for VF 16 For instruction: store i8 %tmp3, ptr %tmp1, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i8.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i8, ptr %tmp0, align 1
   %tmp3 = load i8, ptr %tmp1, align 1
-  store i8 0, ptr %tmp0, align 1
-  store i8 0, ptr %tmp1, align 1
+  store i8 %tmp2, ptr %tmp0, align 1
+  store i8 %tmp3, ptr %tmp1, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -58,31 +58,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'i16_factor_2'
 ; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_2'
 ; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_2'
 ; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_2'
 ; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load i16, ptr %tmp0, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp2, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store i16 %tmp3, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i16.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i16, ptr %tmp0, align 2
   %tmp3 = load i16, ptr %tmp1, align 2
-  store i16 0, ptr %tmp0, align 2
-  store i16 0, ptr %tmp1, align 2
+  store i16 %tmp2, ptr %tmp0, align 2
+  store i16 %tmp3, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -99,31 +99,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'i32_factor_2'
 ; VF_2:          Found an estimated cost of 24 for VF 2 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_2'
 ; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_2'
 ; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_2'
 ; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load i32, ptr %tmp0, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp2, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store i32 %tmp3, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i32.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i32, ptr %tmp0, align 4
   %tmp3 = load i32, ptr %tmp1, align 4
-  store i32 0, ptr %tmp0, align 4
-  store i32 0, ptr %tmp1, align 4
+  store i32 %tmp2, ptr %tmp0, align 4
+  store i32 %tmp3, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -140,31 +140,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'i64_factor_2'
 ; VF_2:          Found an estimated cost of 44 for VF 2 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 44 for VF 2 For instruction: store i64 %tmp3, ptr %tmp1, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_2'
 ; VF_4:          Found an estimated cost of 88 for VF 4 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 88 for VF 4 For instruction: store i64 %tmp3, ptr %tmp1, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_2'
 ; VF_8:          Found an estimated cost of 176 for VF 8 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 176 for VF 8 For instruction: store i64 %tmp3, ptr %tmp1, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_2'
 ; VF_16:         Found an estimated cost of 352 for VF 16 For instruction: %tmp2 = load i64, ptr %tmp0, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp2, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 352 for VF 16 For instruction: store i64 %tmp3, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %i64.2, ptr %data, i64 %i, i32 1
   %tmp2 = load i64, ptr %tmp0, align 8
   %tmp3 = load i64, ptr %tmp1, align 8
-  store i64 0, ptr %tmp0, align 8
-  store i64 0, ptr %tmp1, align 8
+  store i64 %tmp2, ptr %tmp0, align 8
+  store i64 %tmp3, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -181,31 +181,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'f16_factor_2'
 ; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load half, ptr %tmp0, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load half, ptr %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half %tmp2, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store half %tmp3, ptr %tmp1, align 2
 ; VF_4-LABEL:  Checking a loop in 'f16_factor_2'
 ; VF_4:          Found an estimated cost of 18 for VF 4 For instruction: %tmp2 = load half, ptr %tmp0, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load half, ptr %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half %tmp2, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 18 for VF 4 For instruction: store half %tmp3, ptr %tmp1, align 2
 ; VF_8-LABEL:  Checking a loop in 'f16_factor_2'
 ; VF_8:          Found an estimated cost of 4 for VF 8 For instruction: %tmp2 = load half, ptr %tmp0, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load half, ptr %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half %tmp2, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 4 for VF 8 For instruction: store half %tmp3, ptr %tmp1, align 2
 ; VF_16-LABEL: Checking a loop in 'f16_factor_2'
 ; VF_16:         Found an estimated cost of 8 for VF 16 For instruction: %tmp2 = load half, ptr %tmp0, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load half, ptr %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store half 0xH0000, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half %tmp2, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 8 for VF 16 For instruction: store half %tmp3, ptr %tmp1, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %f16.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %f16.2, ptr %data, i64 %i, i32 1
   %tmp2 = load half, ptr %tmp0, align 2
   %tmp3 = load half, ptr %tmp1, align 2
-  store half 0.0, ptr %tmp0, align 2
-  store half 0.0, ptr %tmp1, align 2
+  store half %tmp2, ptr %tmp0, align 2
+  store half %tmp3, ptr %tmp1, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -222,31 +222,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'f32_factor_2'
 ; VF_2:          Found an estimated cost of 10 for VF 2 For instruction: %tmp2 = load float, ptr %tmp0, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load float, ptr %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float %tmp2, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 10 for VF 2 For instruction: store float %tmp3, ptr %tmp1, align 4
 ; VF_4-LABEL:  Checking a loop in 'f32_factor_2'
 ; VF_4:          Found an estimated cost of 4 for VF 4 For instruction: %tmp2 = load float, ptr %tmp0, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load float, ptr %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float %tmp2, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 4 for VF 4 For instruction: store float %tmp3, ptr %tmp1, align 4
 ; VF_8-LABEL:  Checking a loop in 'f32_factor_2'
 ; VF_8:          Found an estimated cost of 8 for VF 8 For instruction: %tmp2 = load float, ptr %tmp0, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load float, ptr %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float %tmp2, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 8 for VF 8 For instruction: store float %tmp3, ptr %tmp1, align 4
 ; VF_16-LABEL: Checking a loop in 'f32_factor_2'
 ; VF_16:         Found an estimated cost of 16 for VF 16 For instruction: %tmp2 = load float, ptr %tmp0, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load float, ptr %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float %tmp2, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 16 for VF 16 For instruction: store float %tmp3, ptr %tmp1, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %f32.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %f32.2, ptr %data, i64 %i, i32 1
   %tmp2 = load float, ptr %tmp0, align 4
   %tmp3 = load float, ptr %tmp1, align 4
-  store float 0.0, ptr %tmp0, align 4
-  store float 0.0, ptr %tmp1, align 4
+  store float %tmp2, ptr %tmp0, align 4
+  store float %tmp3, ptr %tmp1, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -263,31 +263,31 @@ entry:
 ; VF_2-LABEL:  Checking a loop in 'f64_factor_2'
 ; VF_2:          Found an estimated cost of 12 for VF 2 For instruction: %tmp2 = load double, ptr %tmp0, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp3 = load double, ptr %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 8 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double %tmp2, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store double %tmp3, ptr %tmp1, align 8
 ; VF_4-LABEL:  Checking a loop in 'f64_factor_2'
 ; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp2 = load double, ptr %tmp0, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp3 = load double, ptr %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 16 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double %tmp2, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store double %tmp3, ptr %tmp1, align 8
 ; VF_8-LABEL:  Checking a loop in 'f64_factor_2'
 ; VF_8:          Found an estimated cost of 48 for VF 8 For instruction: %tmp2 = load double, ptr %tmp0, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp3 = load double, ptr %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 32 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double %tmp2, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store double %tmp3, ptr %tmp1, align 8
 ; VF_16-LABEL: Checking a loop in 'f64_factor_2'
 ; VF_16:         Found an estimated cost of 96 for VF 16 For instruction: %tmp2 = load double, ptr %tmp0, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp3 = load double, ptr %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 64 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double %tmp2, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store double %tmp3, ptr %tmp1, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 0
   %tmp1 = getelementptr inbounds %f64.2, ptr %data, i64 %i, i32 1
   %tmp2 = load double, ptr %tmp0, align 8
   %tmp3 = load double, ptr %tmp1, align 8
-  store double 0.0, ptr %tmp0, align 8
-  store double 0.0, ptr %tmp1, align 8
+  store double %tmp2, ptr %tmp0, align 8
+  store double %tmp3, ptr %tmp1, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -309,30 +309,30 @@ entry:
 ; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp3, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp4, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 36 for VF 2 For instruction: store i8 %tmp5, ptr %tmp2, align 1
 ; VF_4-LABEL:  Checking a loop in 'i8_factor_3'
 ; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp1, align 1
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp3,  ptr %tmp0, align 1
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp4, ptr %tmp1, align 1
+; VF_4-NEXT:     Found an estimated cost of 72 for VF 4 For instruction: store i8 %tmp5, ptr %tmp2, align 1
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_3'
 ; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp3, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp4, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 144 for VF 8 For instruction: store i8 %tmp5, ptr %tmp2, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_3'
 ; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i8, ptr %tmp0, align 1
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp1, align 1
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp2, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i8 0, ptr %tmp2, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp3, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp4, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 288 for VF 16 For instruction: store i8 %tmp5, ptr %tmp2, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i8.3, ptr %data, i64 %i, i32 0
@@ -341,9 +341,9 @@ for.body:
   %tmp3 = load i8, ptr %tmp0, align 1
   %tmp4 = load i8, ptr %tmp1, align 1
   %tmp5 = load i8, ptr %tmp2, align 1
-  store i8 0, ptr %tmp0, align 1
-  store i8 0, ptr %tmp1, align 1
-  store i8 0, ptr %tmp2, align 1
+  store i8 %tmp3, ptr %tmp0, align 1
+  store i8 %tmp4, ptr %tmp1, align 1
+  store i8 %tmp5, ptr %tmp2, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -361,30 +361,30 @@ entry:
 ; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp3, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp4, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 36 for VF 2 For instruction: store i16 %tmp5, ptr %tmp2, align 2
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_3'
 ; VF_4:          Found an estimated cost of 72 for VF 4 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp3, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp4, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 72 for VF 4 For instruction: store i16 %tmp5, ptr %tmp2, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_3'
 ; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp3, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp4, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 144 for VF 8 For instruction: store i16 %tmp5, ptr %tmp2, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_3'
 ; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i16, ptr %tmp0, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp1, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i16 0, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp3, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp4, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 288 for VF 16 For instruction: store i16 %tmp5, ptr %tmp2, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i16.3, ptr %data, i64 %i, i32 0
@@ -393,9 +393,9 @@ for.body:
   %tmp3 = load i16, ptr %tmp0, align 2
   %tmp4 = load i16, ptr %tmp1, align 2
   %tmp5 = load i16, ptr %tmp2, align 2
-  store i16 0, ptr %tmp0, align 2
-  store i16 0, ptr %tmp1, align 2
-  store i16 0, ptr %tmp2, align 2
+  store i16 %tmp3, ptr %tmp0, align 2
+  store i16 %tmp4, ptr %tmp1, align 2
+  store i16 %tmp5, ptr %tmp2, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -413,30 +413,30 @@ entry:
 ; VF_2:          Found an estimated cost of 36 for VF 2 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp3, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp4, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 36 for VF 2 For instruction: store i32 %tmp5, ptr %tmp2, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_3'
 ; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp3, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp4, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store i32 %tmp5, ptr %tmp2, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_3'
 ; VF_8:          Found an estimated cost of 144 for VF 8 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp3, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp4, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 144 for VF 8 For instruction: store i32 %tmp5, ptr %tmp2, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_3'
 ; VF_16:         Found an estimated cost of 288 for VF 16 For instruction: %tmp3 = load i32, ptr %tmp0, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp1, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store i32 0, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp3, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp4, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 288 for VF 16 For instruction: store i32 %tmp5, ptr %tmp2, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i32.3, ptr %data, i64 %i, i32 0
@@ -445,9 +445,9 @@ for.body:
   %tmp3 = load i32, ptr %tmp0, align 4
   %tmp4 = load i32, ptr %tmp1, align 4
   %tmp5 = load i32, ptr %tmp2, align 4
-  store i32 0, ptr %tmp0, align 4
-  store i32 0, ptr %tmp1, align 4
-  store i32 0, ptr %tmp2, align 4
+  store i32 %tmp3, ptr %tmp0, align 4
+  store i32 %tmp4, ptr %tmp1, align 4
+  store i32 %tmp5, ptr %tmp2, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -465,30 +465,30 @@ entry:
 ; VF_2:          Found an estimated cost of 66 for VF 2 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp3, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp4, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 66 for VF 2 For instruction: store i64 %tmp5, ptr %tmp2, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_3'
 ; VF_4:          Found an estimated cost of 132 for VF 4 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 36 for VF 4 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp3, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp4, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 132 for VF 4 For instruction: store i64 %tmp5, ptr %tmp2, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_3'
 ; VF_8:          Found an estimated cost of 264 for VF 8 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 72 for VF 8 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp3, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp4, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 264 for VF 8 For instruction: store i64 %tmp5, ptr %tmp2, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_3'
 ; VF_16:         Found an estimated cost of 528 for VF 16 For instruction: %tmp3 = load i64, ptr %tmp0, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp1, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 144 for VF 16 For instruction: store i64 0, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp3, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp4, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 528 for VF 16 For instruction: store i64 %tmp5, ptr %tmp2, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i64.3, ptr %data, i64 %i, i32 0
@@ -497,9 +497,9 @@ for.body:
   %tmp3 = load i64, ptr %tmp0, align 8
   %tmp4 = load i64, ptr %tmp1, align 8
   %tmp5 = load i64, ptr %tmp2, align 8
-  store i64 0, ptr %tmp0, align 8
-  store i64 0, ptr %tmp1, align 8
-  store i64 0, ptr %tmp2, align 8
+  store i64 %tmp3, ptr %tmp0, align 8
+  store i64 %tmp4, ptr %tmp1, align 8
+  store i64 %tmp5, ptr %tmp2, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -517,30 +517,30 @@ entry:
 ; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load half, ptr %tmp0, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load half, ptr %tmp1, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half %tmp3, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half %tmp4, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store half %tmp5, ptr %tmp2, align 2
 ; VF_4-LABEL:  Checking a loop in 'f16_factor_3'
 ; VF_4:          Found an estimated cost of 28 for VF 4 For instruction: %tmp3 = load half, ptr %tmp0, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load half, ptr %tmp1, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half %tmp3, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half %tmp4, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 28 for VF 4 For instruction: store half %tmp5, ptr %tmp2, align 2
 ; VF_8-LABEL:  Checking a loop in 'f16_factor_3'
 ; VF_8:          Found an estimated cost of 56 for VF 8 For instruction: %tmp3 = load half, ptr %tmp0, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load half, ptr %tmp1, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half %tmp3, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half %tmp4, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 56 for VF 8 For instruction: store half %tmp5, ptr %tmp2, align 2
 ; VF_16-LABEL: Checking a loop in 'f16_factor_3'
 ; VF_16:         Found an estimated cost of 112 for VF 16 For instruction: %tmp3 = load half, ptr %tmp0, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load half, ptr %tmp1, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store half 0xH0000, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half %tmp3, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half %tmp4, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 112 for VF 16 For instruction: store half %tmp5, ptr %tmp2, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %f16.3, ptr %data, i64 %i, i32 0
@@ -549,9 +549,9 @@ for.body:
   %tmp3 = load half, ptr %tmp0, align 2
   %tmp4 = load half, ptr %tmp1, align 2
   %tmp5 = load half, ptr %tmp2, align 2
-  store half 0.0, ptr %tmp0, align 2
-  store half 0.0, ptr %tmp1, align 2
-  store half 0.0, ptr %tmp2, align 2
+  store half %tmp3, ptr %tmp0, align 2
+  store half %tmp4, ptr %tmp1, align 2
+  store half %tmp5, ptr %tmp2, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -569,30 +569,30 @@ entry:
 ; VF_2:          Found an estimated cost of 16 for VF 2 For instruction: %tmp3 = load float, ptr %tmp0, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load float, ptr %tmp1, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float %tmp3, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float %tmp4, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store float %tmp5, ptr %tmp2, align 4
 ; VF_4-LABEL:  Checking a loop in 'f32_factor_3'
 ; VF_4:          Found an estimated cost of 24 for VF 4 For instruction: %tmp3 = load float, ptr %tmp0, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load float, ptr %tmp1, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float %tmp3, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float %tmp4, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store float %tmp5, ptr %tmp2, align 4
 ; VF_8-LABEL:  Checking a loop in 'f32_factor_3'
 ; VF_8:          Found an estimated cost of 64 for VF 8 For instruction: %tmp3 = load float, ptr %tmp0, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load float, ptr %tmp1, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float %tmp3, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float %tmp4, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store float %tmp5, ptr %tmp2, align 4
 ; VF_16-LABEL: Checking a loop in 'f32_factor_3'
 ; VF_16:         Found an estimated cost of 128 for VF 16 For instruction: %tmp3 = load float, ptr %tmp0, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load float, ptr %tmp1, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float %tmp3, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float %tmp4, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store float %tmp5, ptr %tmp2, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %f32.3, ptr %data, i64 %i, i32 0
@@ -601,9 +601,9 @@ for.body:
   %tmp3 = load float, ptr %tmp0, align 4
   %tmp4 = load float, ptr %tmp1, align 4
   %tmp5 = load float, ptr %tmp2, align 4
-  store float 0.0, ptr %tmp0, align 4
-  store float 0.0, ptr %tmp1, align 4
-  store float 0.0, ptr %tmp2, align 4
+  store float %tmp3, ptr %tmp0, align 4
+  store float %tmp4, ptr %tmp1, align 4
+  store float %tmp5, ptr %tmp2, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -621,30 +621,30 @@ entry:
 ; VF_2:          Found an estimated cost of 18 for VF 2 For instruction: %tmp3 = load double, ptr %tmp0, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp4 = load double, ptr %tmp1, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 12 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double %tmp3, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double %tmp4, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store double %tmp5, ptr %tmp2, align 8
 ; VF_4-LABEL:  Checking a loop in 'f64_factor_3'
 ; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp3 = load double, ptr %tmp0, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp4 = load double, ptr %tmp1, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 24 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double %tmp3, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double %tmp4, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 36 for VF 4 For instruction: store double %tmp5, ptr %tmp2, align 8
 ; VF_8-LABEL:  Checking a loop in 'f64_factor_3'
 ; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp3 = load double, ptr %tmp0, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp4 = load double, ptr %tmp1, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 48 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double %tmp3, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double %tmp4, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 72 for VF 8 For instruction: store double %tmp5, ptr %tmp2, align 8
 ; VF_16-LABEL: Checking a loop in 'f64_factor_3'
 ; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp3 = load double, ptr %tmp0, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp4 = load double, ptr %tmp1, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 96 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double %tmp3, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double %tmp4, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 144 for VF 16 For instruction: store double %tmp5, ptr %tmp2, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %f64.3, ptr %data, i64 %i, i32 0
@@ -653,9 +653,9 @@ for.body:
   %tmp3 = load double, ptr %tmp0, align 8
   %tmp4 = load double, ptr %tmp1, align 8
   %tmp5 = load double, ptr %tmp2, align 8
-  store double 0.0, ptr %tmp0, align 8
-  store double 0.0, ptr %tmp1, align 8
-  store double 0.0, ptr %tmp2, align 8
+  store double %tmp3, ptr %tmp0, align 8
+  store double %tmp4, ptr %tmp1, align 8
+  store double %tmp5, ptr %tmp2, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -677,37 +677,37 @@ entry:
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp1, align 1
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 0, ptr %tmp2, align 1
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i8 0, ptr %tmp3, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp4, ptr %tmp0, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp5, ptr %tmp1, align 1
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i8 %tmp6, ptr %tmp2, align 1
+; VF_2-NEXT:     Found an estimated cost of 48 for VF 2 For instruction: store i8 %tmp7, ptr %tmp3, align 1
 ; VF_4-LABEL: Checking a loop in 'i8_factor_4'
 ; VF_4:         Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
 ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
 ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
 ; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp1, align 1
-; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 0, ptr %tmp2, align 1
-; VF_4-NEXT:    Found an estimated cost of 32 for VF 4 For instruction: store i8 0, ptr %tmp3, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp4, ptr %tmp0, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp5, ptr %tmp1, align 1
+; VF_4-NEXT:    Found an estimated cost of 0 for VF 4 For instruction: store i8 %tmp6, ptr %tmp2, align 1
+; VF_4-NEXT:    Found an estimated cost of 96 for VF 4 For instruction: store i8 %tmp7, ptr %tmp3, align 1
 ; VF_8-LABEL:  Checking a loop in 'i8_factor_4'
 ; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp1, align 1
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 0, ptr %tmp2, align 1
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i8 0, ptr %tmp3, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp4, ptr %tmp0, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp5, ptr %tmp1, align 1
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i8 %tmp6, ptr %tmp2, align 1
+; VF_8-NEXT:     Found an estimated cost of 192 for VF 8 For instruction: store i8 %tmp7, ptr %tmp3, align 1
 ; VF_16-LABEL: Checking a loop in 'i8_factor_4'
 ; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i8, ptr %tmp0, align 1
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i8, ptr %tmp1, align 1
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i8, ptr %tmp2, align 1
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i8, ptr %tmp3, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp0, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp1, align 1
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 0, ptr %tmp2, align 1
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i8 0, ptr %tmp3, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp4, ptr %tmp0, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp5, ptr %tmp1, align 1
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i8 %tmp6, ptr %tmp2, align 1
+; VF_16-NEXT:    Found an estimated cost of 384 for VF 16 For instruction: store i8 %tmp7, ptr %tmp3, align 1
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i8.4, ptr %data, i64 %i, i32 0
@@ -718,10 +718,10 @@ for.body:
   %tmp5 = load i8, ptr %tmp1, align 1
   %tmp6 = load i8, ptr %tmp2, align 1
   %tmp7 = load i8, ptr %tmp3, align 1
-  store i8 0, ptr %tmp0, align 1
-  store i8 0, ptr %tmp1, align 1
-  store i8 0, ptr %tmp2, align 1
-  store i8 0, ptr %tmp3, align 1
+  store i8 %tmp4, ptr %tmp0, align 1
+  store i8 %tmp5, ptr %tmp1, align 1
+  store i8 %tmp6, ptr %tmp2, align 1
+  store i8 %tmp7, ptr %tmp3, align 1
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -740,37 +740,37 @@ entry:
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 0, ptr %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i16 0, ptr %tmp3, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp4, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp5, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i16 %tmp6, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 48 for VF 2 For instruction: store i16 %tmp7, ptr %tmp3, align 2
 ; VF_4-LABEL:  Checking a loop in 'i16_factor_4'
 ; VF_4:          Found an estimated cost of 96 for VF 4 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 0, ptr %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i16 0, ptr %tmp3, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp4, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp5, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i16 %tmp6, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 96 for VF 4 For instruction: store i16 %tmp7, ptr %tmp3, align 2
 ; VF_8-LABEL:  Checking a loop in 'i16_factor_4'
 ; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 0, ptr %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i16 0, ptr %tmp3, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp4, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp5, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i16 %tmp6, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 192 for VF 8 For instruction: store i16 %tmp7, ptr %tmp3, align 2
 ; VF_16-LABEL: Checking a loop in 'i16_factor_4'
 ; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i16, ptr %tmp0, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i16, ptr %tmp1, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i16, ptr %tmp2, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i16, ptr %tmp3, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 0, ptr %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i16 0, ptr %tmp3, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp4, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp5, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i16 %tmp6, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 384 for VF 16 For instruction: store i16 %tmp7, ptr %tmp3, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i16.4, ptr %data, i64 %i, i32 0
@@ -781,10 +781,10 @@ for.body:
   %tmp5 = load i16, ptr %tmp1, align 2
   %tmp6 = load i16, ptr %tmp2, align 2
   %tmp7 = load i16, ptr %tmp3, align 2
-  store i16 0, ptr %tmp0, align 2
-  store i16 0, ptr %tmp1, align 2
-  store i16 0, ptr %tmp2, align 2
-  store i16 0, ptr %tmp3, align 2
+  store i16 %tmp4, ptr %tmp0, align 2
+  store i16 %tmp5, ptr %tmp1, align 2
+  store i16 %tmp6, ptr %tmp2, align 2
+  store i16 %tmp7, ptr %tmp3, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -803,37 +803,37 @@ entry:
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 0, ptr %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store i32 0, ptr %tmp3, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp4, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp5, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i32 %tmp6, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 48 for VF 2 For instruction: store i32 %tmp7, ptr %tmp3, align 4
 ; VF_4-LABEL:  Checking a loop in 'i32_factor_4'
 ; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 0, ptr %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i32 0, ptr %tmp3, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp4, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp5, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i32 %tmp6, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store i32 %tmp7, ptr %tmp3, align 4
 ; VF_8-LABEL:  Checking a loop in 'i32_factor_4'
 ; VF_8:          Found an estimated cost of 192 for VF 8 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 0, ptr %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store i32 0, ptr %tmp3, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp4, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp5, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i32 %tmp6, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 192 for VF 8 For instruction: store i32 %tmp7, ptr %tmp3, align 4
 ; VF_16-LABEL: Checking a loop in 'i32_factor_4'
 ; VF_16:         Found an estimated cost of 384 for VF 16 For instruction: %tmp4 = load i32, ptr %tmp0, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i32, ptr %tmp1, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i32, ptr %tmp2, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i32, ptr %tmp3, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 0, ptr %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store i32 0, ptr %tmp3, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp4, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp5, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i32 %tmp6, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 384 for VF 16 For instruction: store i32 %tmp7, ptr %tmp3, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i32.4, ptr %data, i64 %i, i32 0
@@ -844,10 +844,10 @@ for.body:
   %tmp5 = load i32, ptr %tmp1, align 4
   %tmp6 = load i32, ptr %tmp2, align 4
   %tmp7 = load i32, ptr %tmp3, align 4
-  store i32 0, ptr %tmp0, align 4
-  store i32 0, ptr %tmp1, align 4
-  store i32 0, ptr %tmp2, align 4
-  store i32 0, ptr %tmp3, align 4
+  store i32 %tmp4, ptr %tmp0, align 4
+  store i32 %tmp5, ptr %tmp1, align 4
+  store i32 %tmp6, ptr %tmp2, align 4
+  store i32 %tmp7, ptr %tmp3, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -866,37 +866,37 @@ entry:
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 0, ptr %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store i64 0, ptr %tmp3, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp4, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp5, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store i64 %tmp6, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 88 for VF 2 For instruction: store i64 %tmp7, ptr %tmp3, align 8
 ; VF_4-LABEL:  Checking a loop in 'i64_factor_4'
 ; VF_4:          Found an estimated cost of 176 for VF 4 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 0, ptr %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 48 for VF 4 For instruction: store i64 0, ptr %tmp3, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp4, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp5, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store i64 %tmp6, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 176 for VF 4 For instruction: store i64 %tmp7, ptr %tmp3, align 8
 ; VF_8-LABEL:  Checking a loop in 'i64_factor_4'
 ; VF_8:          Found an estimated cost of 352 for VF 8 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 0, ptr %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 96 for VF 8 For instruction: store i64 0, ptr %tmp3, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp4, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp5, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store i64 %tmp6, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 352 for VF 8 For instruction: store i64 %tmp7, ptr %tmp3, align 8
 ; VF_16-LABEL: Checking a loop in 'i64_factor_4'
 ; VF_16:         Found an estimated cost of 704 for VF 16 For instruction: %tmp4 = load i64, ptr %tmp0, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load i64, ptr %tmp1, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load i64, ptr %tmp2, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load i64, ptr %tmp3, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 0, ptr %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 192 for VF 16 For instruction: store i64 0, ptr %tmp3, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp4, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp5, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store i64 %tmp6, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 704 for VF 16 For instruction: store i64 %tmp7, ptr %tmp3, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %i64.4, ptr %data, i64 %i, i32 0
@@ -907,10 +907,10 @@ for.body:
   %tmp5 = load i64, ptr %tmp1, align 8
   %tmp6 = load i64, ptr %tmp2, align 8
   %tmp7 = load i64, ptr %tmp3, align 8
-  store i64 0, ptr %tmp0, align 8
-  store i64 0, ptr %tmp1, align 8
-  store i64 0, ptr %tmp2, align 8
-  store i64 0, ptr %tmp3, align 8
+  store i64 %tmp4, ptr %tmp0, align 8
+  store i64 %tmp5, ptr %tmp1, align 8
+  store i64 %tmp6, ptr %tmp2, align 8
+  store i64 %tmp7, ptr %tmp3, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -929,37 +929,37 @@ entry:
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load half, ptr %tmp1, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load half, ptr %tmp2, align 2
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load half, ptr %tmp3, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp1, align 2
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half 0xH0000, ptr %tmp2, align 2
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store half 0xH0000, ptr %tmp3, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half %tmp4, ptr %tmp0, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half %tmp5, ptr %tmp1, align 2
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store half %tmp6, ptr %tmp2, align 2
+; VF_2-NEXT:     Found an estimated cost of 18 for VF 2 For instruction: store half %tmp7, ptr %tmp3, align 2
 ; VF_4-LABEL:  Checking a loop in 'f16_factor_4'
 ; VF_4:          Found an estimated cost of 36 for VF 4 For instruction: %tmp4 = load half, ptr %tmp0, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load half, ptr %tmp1, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load half, ptr %tmp2, align 2
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load half, ptr %tmp3, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp1, align 2
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half 0xH0000, ptr %tmp2, align 2
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store half 0xH0000, ptr %tmp3, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half %tmp4, ptr %tmp0, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half %tmp5, ptr %tmp1, align 2
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store half %tmp6, ptr %tmp2, align 2
+; VF_4-NEXT:     Found an estimated cost of 36 for VF 4 For instruction: store half %tmp7, ptr %tmp3, align 2
 ; VF_8-LABEL:  Checking a loop in 'f16_factor_4'
 ; VF_8:          Found an estimated cost of 72 for VF 8 For instruction: %tmp4 = load half, ptr %tmp0, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load half, ptr %tmp1, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load half, ptr %tmp2, align 2
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load half, ptr %tmp3, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp1, align 2
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half 0xH0000, ptr %tmp2, align 2
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store half 0xH0000, ptr %tmp3, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half %tmp4, ptr %tmp0, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half %tmp5, ptr %tmp1, align 2
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store half %tmp6, ptr %tmp2, align 2
+; VF_8-NEXT:     Found an estimated cost of 72 for VF 8 For instruction: store half %tmp7, ptr %tmp3, align 2
 ; VF_16-LABEL: Checking a loop in 'f16_factor_4'
 ; VF_16:         Found an estimated cost of 144 for VF 16 For instruction: %tmp4 = load half, ptr %tmp0, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load half, ptr %tmp1, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load half, ptr %tmp2, align 2
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load half, ptr %tmp3, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp0, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp1, align 2
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half 0xH0000, ptr %tmp2, align 2
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store half 0xH0000, ptr %tmp3, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half %tmp4, ptr %tmp0, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half %tmp5, ptr %tmp1, align 2
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store half %tmp6, ptr %tmp2, align 2
+; VF_16-NEXT:    Found an estimated cost of 144 for VF 16 For instruction: store half %tmp7, ptr %tmp3, align 2
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %f16.4, ptr %data, i64 %i, i32 0
@@ -970,10 +970,10 @@ for.body:
   %tmp5 = load half, ptr %tmp1, align 2
   %tmp6 = load half, ptr %tmp2, align 2
   %tmp7 = load half, ptr %tmp3, align 2
-  store half 0.0, ptr %tmp0, align 2
-  store half 0.0, ptr %tmp1, align 2
-  store half 0.0, ptr %tmp2, align 2
-  store half 0.0, ptr %tmp3, align 2
+  store half %tmp4, ptr %tmp0, align 2
+  store half %tmp5, ptr %tmp1, align 2
+  store half %tmp6, ptr %tmp2, align 2
+  store half %tmp7, ptr %tmp3, align 2
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -992,37 +992,37 @@ entry:
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load float, ptr %tmp1, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load float, ptr %tmp2, align 4
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load float, ptr %tmp3, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float %tmp4, ptr %tmp0, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float %tmp5, ptr %tmp1, align 4
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store float %tmp6, ptr %tmp2, align 4
+; VF_2-NEXT:     Found an estimated cost of 20 for VF 2 For instruction: store float %tmp7, ptr %tmp3, align 4
 ; VF_4-LABEL:  Checking a loop in 'f32_factor_4'
 ; VF_4:          Found an estimated cost of 32 for VF 4 For instruction: %tmp4 = load float, ptr %tmp0, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load float, ptr %tmp1, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load float, ptr %tmp2, align 4
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load float, ptr %tmp3, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float %tmp4, ptr %tmp0, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float %tmp5, ptr %tmp1, align 4
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store float %tmp6, ptr %tmp2, align 4
+; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store float %tmp7, ptr %tmp3, align 4
 ; VF_8-LABEL:  Checking a loop in 'f32_factor_4'
 ; VF_8:          Found an estimated cost of 80 for VF 8 For instruction: %tmp4 = load float, ptr %tmp0, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load float, ptr %tmp1, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load float, ptr %tmp2, align 4
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load float, ptr %tmp3, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float %tmp4, ptr %tmp0, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float %tmp5, ptr %tmp1, align 4
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store float %tmp6, ptr %tmp2, align 4
+; VF_8-NEXT:     Found an estimated cost of 80 for VF 8 For instruction: store float %tmp7, ptr %tmp3, align 4
 ; VF_16-LABEL: Checking a loop in 'f32_factor_4'
 ; VF_16:         Found an estimated cost of 160 for VF 16 For instruction: %tmp4 = load float, ptr %tmp0, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load float, ptr %tmp1, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load float, ptr %tmp2, align 4
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load float, ptr %tmp3, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp0, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp1, align 4
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp2, align 4
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store float 0.000000e+00, ptr %tmp3, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float %tmp4, ptr %tmp0, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float %tmp5, ptr %tmp1, align 4
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store float %tmp6, ptr %tmp2, align 4
+; VF_16-NEXT:    Found an estimated cost of 160 for VF 16 For instruction: store float %tmp7, ptr %tmp3, align 4
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %f32.4, ptr %data, i64 %i, i32 0
@@ -1033,10 +1033,10 @@ for.body:
   %tmp5 = load float, ptr %tmp1, align 4
   %tmp6 = load float, ptr %tmp2, align 4
   %tmp7 = load float, ptr %tmp3, align 4
-  store float 0.0, ptr %tmp0, align 4
-  store float 0.0, ptr %tmp1, align 4
-  store float 0.0, ptr %tmp2, align 4
-  store float 0.0, ptr %tmp3, align 4
+  store float %tmp4, ptr %tmp0, align 4
+  store float %tmp5, ptr %tmp1, align 4
+  store float %tmp6, ptr %tmp2, align 4
+  store float %tmp7, ptr %tmp3, align 4
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end
@@ -1055,37 +1055,37 @@ entry:
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp5 = load double, ptr %tmp1, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp6 = load double, ptr %tmp2, align 8
 ; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: %tmp7 = load double, ptr %tmp3, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
-; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
-; VF_2-NEXT:     Found an estimated cost of 16 for VF 2 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double %tmp4, ptr %tmp0, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double %tmp5, ptr %tmp1, align 8
+; VF_2-NEXT:     Found an estimated cost of 0 for VF 2 For instruction: store double %tmp6, ptr %tmp2, align 8
+; VF_2-NEXT:     Found an estimated cost of 24 for VF 2 For instruction: store double %tmp7, ptr %tmp3, align 8
 ; VF_4-LABEL:  Checking a loop in 'f64_factor_4'
 ; VF_4:          Found an estimated cost of 48 for VF 4 For instruction: %tmp4 = load double, ptr %tmp0, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp5 = load double, ptr %tmp1, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp6 = load double, ptr %tmp2, align 8
 ; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: %tmp7 = load double, ptr %tmp3, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
-; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
-; VF_4-NEXT:     Found an estimated cost of 32 for VF 4 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double %tmp4, ptr %tmp0, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double %tmp5, ptr %tmp1, align 8
+; VF_4-NEXT:     Found an estimated cost of 0 for VF 4 For instruction: store double %tmp6, ptr %tmp2, align 8
+; VF_4-NEXT:     Found an estimated cost of 48 for VF 4 For instruction: store double %tmp7, ptr %tmp3, align 8
 ; VF_8-LABEL:  Checking a loop in 'f64_factor_4'
 ; VF_8:          Found an estimated cost of 96 for VF 8 For instruction: %tmp4 = load double, ptr %tmp0, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp5 = load double, ptr %tmp1, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp6 = load double, ptr %tmp2, align 8
 ; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: %tmp7 = load double, ptr %tmp3, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
-; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
-; VF_8-NEXT:     Found an estimated cost of 64 for VF 8 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double %tmp4, ptr %tmp0, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double %tmp5, ptr %tmp1, align 8
+; VF_8-NEXT:     Found an estimated cost of 0 for VF 8 For instruction: store double %tmp6, ptr %tmp2, align 8
+; VF_8-NEXT:     Found an estimated cost of 96 for VF 8 For instruction: store double %tmp7, ptr %tmp3, align 8
 ; VF_16-LABEL: Checking a loop in 'f64_factor_4'
 ; VF_16:         Found an estimated cost of 192 for VF 16 For instruction: %tmp4 = load double, ptr %tmp0, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp5 = load double, ptr %tmp1, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp6 = load double, ptr %tmp2, align 8
 ; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: %tmp7 = load double, ptr %tmp3, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp0, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp1, align 8
-; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp2, align 8
-; VF_16-NEXT:    Found an estimated cost of 128 for VF 16 For instruction: store double 0.000000e+00, ptr %tmp3, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double %tmp4, ptr %tmp0, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double %tmp5, ptr %tmp1, align 8
+; VF_16-NEXT:    Found an estimated cost of 0 for VF 16 For instruction: store double %tmp6, ptr %tmp2, align 8
+; VF_16-NEXT:    Found an estimated cost of 192 for VF 16 For instruction: store double %tmp7, ptr %tmp3, align 8
 for.body:
   %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
   %tmp0 = getelementptr inbounds %f64.4, ptr %data, i64 %i, i32 0
@@ -1096,10 +1096,10 @@ for.body:
   %tmp5 = load double, ptr %tmp1, align 8
   %tmp6 = load double, ptr %tmp2, align 8
   %tmp7 = load double, ptr %tmp3, align 8
-  store double 0.0, ptr %tmp0, align 8
-  store double 0.0, ptr %tmp1, align 8
-  store double 0.0, ptr %tmp2, align 8
-  store double 0.0, ptr %tmp3, align 8
+  store double %tmp4, ptr %tmp0, align 8
+  store double %tmp5, ptr %tmp1, align 8
+  store double %tmp6, ptr %tmp2, align 8
+  store double %tmp7, ptr %tmp3, align 8
   %i.next = add nuw nsw i64 %i, 1
   %cond = icmp slt i64 %i.next, %n
   br i1 %cond, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll
index 83dc618fc83ff..02f8bc61cbb62 100644
--- a/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll
+++ b/llvm/test/Transforms/LoopVectorize/SystemZ/load-scalarization-cost-0.ll
@@ -12,9 +12,11 @@ entry:
 for.body:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
   %mul = mul nsw i64 %iv, %s
-  %gep = getelementptr inbounds double, ptr %Src, i64 %mul
-  %bct = bitcast ptr %gep to ptr
+  %gep.src = getelementptr inbounds double, ptr %Src, i64 %mul
+  %bct = bitcast ptr %gep.src to ptr
   %ld = load i64, ptr %bct
+  %gep.data = getelementptr inbounds i64, ptr %data, i64 %iv
+  store i64 %ld, ptr %gep.data
   %iv.next = add nuw nsw i64 %iv, 1
   %cmp110.us = icmp slt i64 %iv.next, %n
   br i1 %cmp110.us, label %for.body, label %for.end

diff  --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll
index 314a133debbd7..98a942a501077 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-scalable-vf1.ll
@@ -51,7 +51,7 @@ define void @pr97452_scalable_vf1_for_no_live_out(ptr %src, ptr noalias %dst) {
 ; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
 ; CHECK-NEXT:    [[L]] = load i64, ptr [[GEP]], align 8
 ; CHECK-NEXT:    [[GEP_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
-; CHECK-NEXT:    store i64 [[L]], ptr [[GEP_DST]], align 8
+; CHECK-NEXT:    store i64 [[FOR]], ptr [[GEP_DST]], align 8
 ; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV]], 22
 ; CHECK-NEXT:    br i1 [[EC]], label %[[EXIT:.*]], label %[[LOOP]]
 ; CHECK:       [[EXIT]]:
@@ -67,7 +67,7 @@ loop:
   %gep = getelementptr inbounds i64, ptr %src, i64 %iv
   %l = load i64, ptr %gep, align 8
   %gep.dst = getelementptr inbounds i64, ptr %dst, i64 %iv
-  store i64 %l, ptr %gep.dst
+  store i64 %for, ptr %gep.dst
   %ec = icmp eq i64 %iv, 22
   br i1 %ec, label %exit, label %loop
 


        


More information about the llvm-commits mailing list