[llvm] 6071de3 - [RISCV] Autogen a test for ease of update

Mon Jun 6 12:44:42 PDT 2022

Author: Philip Reames
Date: 2022-06-06T12:44:34-07:00
New Revision: 6071de3db60743976d7ce8d28bd101956e7b63dd

URL: https://github.com/llvm/llvm-project/commit/6071de3db60743976d7ce8d28bd101956e7b63dd
DIFF: https://github.com/llvm/llvm-project/commit/6071de3db60743976d7ce8d28bd101956e7b63dd.diff

LOG: [RISCV] Autogen a test for ease of update

Added: 
    

Modified: 
    llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
index e01ecd5632e3d..13cb6cc252ee5 100644

--- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
 ; This is the loop in c++ being vectorize in this file with
 ;experimental.vector.reverse
 ;  #pragma clang loop vectorize_width(4, scalable)
@@ -7,21 +8,287 @@
 ; REQUIRES: asserts
 ; RUN: opt -loop-vectorize -dce -instcombine -mtriple riscv64-linux-gnu \
 ; RUN:   -mattr=+v -debug-only=loop-vectorize -scalable-vectorization=on \
-; RUN:   -riscv-v-vector-bits-min=128 -S < %s 2>&1 | FileCheck %s
+; RUN:   -riscv-v-vector-bits-min=128 -disable-output < %s 2>&1 | FileCheck %s
 
-; CHECK-LABEL: vector_reverse_i64
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %{{.*}} = load i32, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: store i32 %{{.*}}, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %{{.*}} = load i32, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: store i32 %{{.*}}, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %{{.*}} = load i32, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store i32 %{{.*}}, ptr %{{.*}}, align 4
-; CHECK: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load   %1 = load i32, ptr %arrayidx, align 4
-; CHECK: remark: <unknown>:0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load
-; CHECK: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store   store i32 %add9, ptr %arrayidx3, align 4
-; CHECK: remark: <unknown>:0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store
-; CHECK: LV: Selecting VF: 4.
 define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocapture noundef readonly %B, i32 noundef signext %n) {
+; CHECK-LABEL: 'vector_reverse_i64'
+; CHECK-NEXT:  LV: Loop hints: force=enabled width=vscale x 4 interleave=0
+; CHECK-NEXT:  LV: Found a loop: for.body
+; CHECK-NEXT:  LV: Found an induction variable.
+; CHECK-NEXT:  LV: Found an induction variable.
+; CHECK-NEXT:  LV: Did not find one integer induction var.
+; CHECK-NEXT:  LV: We can vectorize this loop (with a runtime bound check)!
+; CHECK-NEXT:  LV: Found trip count: 0
+; CHECK-NEXT:  LV: Scalable vectorization is available
+; CHECK-NEXT:  LV: The max safe fixed VF is: 67108864.
+; CHECK-NEXT:  LV: The max safe scalable VF is: vscale x 4294967295.
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: UserVF ignored because of invalid costs..
+; CHECK-NEXT:  LV: Interleaving disabled by the pass manager
+; CHECK-NEXT:  remark: <unknown>:0:0: UserVF ignored because of invalid costs.
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found scalar instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found scalar instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found scalar instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found scalar instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found scalar instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found scalar instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found scalar instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found scalar instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Scalarizing: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Scalarizing: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Scalarizing: %add9 = add i32 %1, 1
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Scalarizing: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Scalarizing: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Scalarizing: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  VPlan 'Initial VPlan for VF={1},UF>=1' {
+; CHECK-NEXT:  Live-in vp<%2> = vector-trip-count
+; CHECK:       vector.ph:
+; CHECK-NEXT:  Successor(s): vector loop
+; CHECK:       <x1> vector loop: {
+; CHECK-NEXT:    vector.body:
+; CHECK-NEXT:    EMIT vp<%3> = CANONICAL-INDUCTION
+; CHECK-NEXT:    vp<%4> = SCALAR-STEPS vp<%3>, ir<%n>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%i.0> = add vp<%4>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%idxprom> = zext ir<%i.0>
+; CHECK-NEXT:    CLONE ir<%arrayidx> = getelementptr ir<%B>, ir<%idxprom>
+; CHECK-NEXT:    CLONE ir<%1> = load ir<%arrayidx>
+; CHECK-NEXT:    CLONE ir<%add9> = add ir<%1>, ir<1>
+; CHECK-NEXT:    CLONE ir<%arrayidx3> = getelementptr ir<%A>, ir<%idxprom>
+; CHECK-NEXT:    CLONE store ir<%add9>, ir<%arrayidx3>
+; CHECK-NEXT:    EMIT vp<%12> = VF * UF +(nuw) vp<%3>
+; CHECK-NEXT:    EMIT branch-on-count vp<%12> vp<%2>
+; CHECK-NEXT:    No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  Successor(s): middle.block
+; CHECK:       middle.block:
+; CHECK-NEXT:  No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  VPlan 'Initial VPlan for VF={2,4},UF>=1' {
+; CHECK-NEXT:  Live-in vp<%2> = vector-trip-count
+; CHECK:       vector.ph:
+; CHECK-NEXT:  Successor(s): vector loop
+; CHECK:       <x1> vector loop: {
+; CHECK-NEXT:    vector.body:
+; CHECK-NEXT:    EMIT vp<%3> = CANONICAL-INDUCTION
+; CHECK-NEXT:    vp<%4> = SCALAR-STEPS vp<%3>, ir<%n>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%i.0> = add vp<%4>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%idxprom> = zext ir<%i.0>
+; CHECK-NEXT:    CLONE ir<%arrayidx> = getelementptr ir<%B>, ir<%idxprom>
+; CHECK-NEXT:    WIDEN ir<%1> = load ir<%arrayidx>
+; CHECK-NEXT:    WIDEN ir<%add9> = add ir<%1>, ir<1>
+; CHECK-NEXT:    CLONE ir<%arrayidx3> = getelementptr ir<%A>, ir<%idxprom>
+; CHECK-NEXT:    WIDEN store ir<%arrayidx3>, ir<%add9>
+; CHECK-NEXT:    EMIT vp<%11> = VF * UF +(nuw) vp<%3>
+; CHECK-NEXT:    EMIT branch-on-count vp<%11> vp<%2>
+; CHECK-NEXT:    No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  Successor(s): middle.block
+; CHECK:       middle.block:
+; CHECK-NEXT:  No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; CHECK-NEXT:  Live-in vp<%2> = vector-trip-count
+; CHECK:       vector.ph:
+; CHECK-NEXT:  Successor(s): vector loop
+; CHECK:       <x1> vector loop: {
+; CHECK-NEXT:    vector.body:
+; CHECK-NEXT:    EMIT vp<%3> = CANONICAL-INDUCTION
+; CHECK-NEXT:    vp<%4> = SCALAR-STEPS vp<%3>, ir<%n>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%i.0> = add vp<%4>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%idxprom> = zext ir<%i.0>
+; CHECK-NEXT:    CLONE ir<%arrayidx> = getelementptr ir<%B>, ir<%idxprom>
+; CHECK-NEXT:    WIDEN ir<%1> = load ir<%arrayidx>
+; CHECK-NEXT:    WIDEN ir<%add9> = add ir<%1>, ir<1>
+; CHECK-NEXT:    CLONE ir<%arrayidx3> = getelementptr ir<%A>, ir<%idxprom>
+; CHECK-NEXT:    WIDEN store ir<%arrayidx3>, ir<%add9>
+; CHECK-NEXT:    EMIT vp<%11> = VF * UF +(nuw) vp<%3>
+; CHECK-NEXT:    EMIT branch-on-count vp<%11> vp<%2>
+; CHECK-NEXT:    No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  Successor(s): middle.block
+; CHECK:       middle.block:
+; CHECK-NEXT:  No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %add9 = add i32 %1, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Scalar loop costs: 10.
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 5 for VF 2 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF 2 For instruction: %add9 = add i32 %1, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 5 for VF 2 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width 2 costs: 9.
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 9 for VF 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF 4 For instruction: %add9 = add i32 %1, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 9 for VF 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width 4 costs: 6.
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %add9 = add i32 %1, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width vscale x 1 costs: Invalid (assuming a minimum vscale of 1).
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %add9 = add i32 %1, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width vscale x 2 costs: Invalid (assuming a minimum vscale of 1).
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds i32, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %add9 = add i32 %1, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds i32, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width vscale x 4 costs: Invalid (assuming a minimum vscale of 1).
+; CHECK-NEXT:  LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load %1 = load i32, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Interleaving disabled by the pass manager
+; CHECK-NEXT:  remark: <unknown>:0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load
+; CHECK-NEXT:  LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store store i32 %add9, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Interleaving disabled by the pass manager
+; CHECK-NEXT:  remark: <unknown>:0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store
+; CHECK-NEXT:  LV: Vectorization seems to be not beneficial, but was forced by a user.
+; CHECK-NEXT:  LV: Selecting VF: 4.
+; CHECK-NEXT:  LV(REG): Calculating max register usage:
+; CHECK-NEXT:  LV(REG): At #0 Interval # 0
+; CHECK-NEXT:  LV(REG): At #1 Interval # 1
+; CHECK-NEXT:  LV(REG): At #2 Interval # 2
+; CHECK-NEXT:  LV(REG): At #3 Interval # 2
+; CHECK-NEXT:  LV(REG): At #4 Interval # 2
+; CHECK-NEXT:  LV(REG): At #5 Interval # 3
+; CHECK-NEXT:  LV(REG): At #6 Interval # 3
+; CHECK-NEXT:  LV(REG): At #7 Interval # 3
+; CHECK-NEXT:  LV(REG): At #9 Interval # 1
+; CHECK-NEXT:  LV(REG): At #10 Interval # 2
+; CHECK-NEXT:  LV(REG): VF = 4
+; CHECK-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 1 registers
+; CHECK-NEXT:  LV(REG): Found invariant usage: 1 item
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
+; CHECK-NEXT:  LV: The target has 31 registers of RISCV::GPRRC register class
+; CHECK-NEXT:  LV: The target has 32 registers of RISCV::VRRC register class
+; CHECK-NEXT:  LV: Loop cost is 27
+; CHECK-NEXT:  LV: IC is 2
+; CHECK-NEXT:  LV: VF is 4
+; CHECK-NEXT:  LV: Not Interleaving.
+; CHECK-NEXT:  LV: Interleaving is not beneficial.
+; CHECK-NEXT:  LV: Found a vectorizable loop (4) in <stdin>
+; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
+; CHECK-NEXT:  Executing best plan with VF=4, UF=1
+; CHECK-NEXT:  LV: Interleaving disabled by the pass manager
+;
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
@@ -48,19 +315,286 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !0
 }
 
-; CHECK-LABEL: vector_reverse_f32
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %{{.*}} = load float, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: store float %{{.*}}, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %{{.*}} = load float, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: store float %{{.*}}, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %{{.*}} = load float, ptr %{{.*}}, align 4
-; CHECK: LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store float %{{.*}}, ptr %{{.*}}, align 4
-; CHECK: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load   %1 = load float, ptr %arrayidx, align 4
-; CHECK: remark: <unknown>:0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load
-; CHECK: LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store   store float %conv1, ptr %arrayidx3, align 4
-; CHECK: remark: <unknown>:0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store
-; CHECK: LV: Selecting VF: 4.
 define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocapture noundef readonly %B, i32 noundef signext %n) {
+; CHECK-LABEL: 'vector_reverse_f32'
+; CHECK-NEXT:  LV: Loop hints: force=enabled width=vscale x 4 interleave=0
+; CHECK-NEXT:  LV: Found a loop: for.body
+; CHECK-NEXT:  LV: Found an induction variable.
+; CHECK-NEXT:  LV: Found an induction variable.
+; CHECK-NEXT:  LV: Found FP op with unsafe algebra.
+; CHECK-NEXT:  LV: Did not find one integer induction var.
+; CHECK-NEXT:  LV: We can vectorize this loop (with a runtime bound check)!
+; CHECK-NEXT:  LV: Found trip count: 0
+; CHECK-NEXT:  LV: Scalable vectorization is available
+; CHECK-NEXT:  LV: The max safe fixed VF is: 67108864.
+; CHECK-NEXT:  LV: The max safe scalable VF is: vscale x 4294967295.
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: UserVF ignored because of invalid costs..
+; CHECK-NEXT:  LV: Interleaving disabled by the pass manager
+; CHECK-NEXT:  remark: <unknown>:0:0: UserVF ignored because of invalid costs.
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found scalar instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found scalar instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found scalar instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found scalar instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found scalar instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found scalar instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found scalar instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found scalar instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found uniform instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Scalarizing: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Scalarizing: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Scalarizing: %conv1 = fadd float %1, 1.000000e+00
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Scalarizing: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Scalarizing: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Scalarizing: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Scalarizing: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  VPlan 'Initial VPlan for VF={1},UF>=1' {
+; CHECK-NEXT:  Live-in vp<%2> = vector-trip-count
+; CHECK:       vector.ph:
+; CHECK-NEXT:  Successor(s): vector loop
+; CHECK:       <x1> vector loop: {
+; CHECK-NEXT:    vector.body:
+; CHECK-NEXT:    EMIT vp<%3> = CANONICAL-INDUCTION
+; CHECK-NEXT:    vp<%4> = SCALAR-STEPS vp<%3>, ir<%n>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%i.0> = add vp<%4>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%idxprom> = zext ir<%i.0>
+; CHECK-NEXT:    CLONE ir<%arrayidx> = getelementptr ir<%B>, ir<%idxprom>
+; CHECK-NEXT:    CLONE ir<%1> = load ir<%arrayidx>
+; CHECK-NEXT:    CLONE ir<%conv1> = fadd ir<%1>, ir<1.000000e+00>
+; CHECK-NEXT:    CLONE ir<%arrayidx3> = getelementptr ir<%A>, ir<%idxprom>
+; CHECK-NEXT:    CLONE store ir<%conv1>, ir<%arrayidx3>
+; CHECK-NEXT:    EMIT vp<%12> = VF * UF +(nuw) vp<%3>
+; CHECK-NEXT:    EMIT branch-on-count vp<%12> vp<%2>
+; CHECK-NEXT:    No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  Successor(s): middle.block
+; CHECK:       middle.block:
+; CHECK-NEXT:  No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  VPlan 'Initial VPlan for VF={2,4},UF>=1' {
+; CHECK-NEXT:  Live-in vp<%2> = vector-trip-count
+; CHECK:       vector.ph:
+; CHECK-NEXT:  Successor(s): vector loop
+; CHECK:       <x1> vector loop: {
+; CHECK-NEXT:    vector.body:
+; CHECK-NEXT:    EMIT vp<%3> = CANONICAL-INDUCTION
+; CHECK-NEXT:    vp<%4> = SCALAR-STEPS vp<%3>, ir<%n>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%i.0> = add vp<%4>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%idxprom> = zext ir<%i.0>
+; CHECK-NEXT:    CLONE ir<%arrayidx> = getelementptr ir<%B>, ir<%idxprom>
+; CHECK-NEXT:    WIDEN ir<%1> = load ir<%arrayidx>
+; CHECK-NEXT:    WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00>
+; CHECK-NEXT:    CLONE ir<%arrayidx3> = getelementptr ir<%A>, ir<%idxprom>
+; CHECK-NEXT:    WIDEN store ir<%arrayidx3>, ir<%conv1>
+; CHECK-NEXT:    EMIT vp<%11> = VF * UF +(nuw) vp<%3>
+; CHECK-NEXT:    EMIT branch-on-count vp<%11> vp<%2>
+; CHECK-NEXT:    No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  Successor(s): middle.block
+; CHECK:       middle.block:
+; CHECK-NEXT:  No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
+; CHECK-NEXT:  Live-in vp<%2> = vector-trip-count
+; CHECK:       vector.ph:
+; CHECK-NEXT:  Successor(s): vector loop
+; CHECK:       <x1> vector loop: {
+; CHECK-NEXT:    vector.body:
+; CHECK-NEXT:    EMIT vp<%3> = CANONICAL-INDUCTION
+; CHECK-NEXT:    vp<%4> = SCALAR-STEPS vp<%3>, ir<%n>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%i.0> = add vp<%4>, ir<-1>
+; CHECK-NEXT:    CLONE ir<%idxprom> = zext ir<%i.0>
+; CHECK-NEXT:    CLONE ir<%arrayidx> = getelementptr ir<%B>, ir<%idxprom>
+; CHECK-NEXT:    WIDEN ir<%1> = load ir<%arrayidx>
+; CHECK-NEXT:    WIDEN ir<%conv1> = fadd ir<%1>, ir<1.000000e+00>
+; CHECK-NEXT:    CLONE ir<%arrayidx3> = getelementptr ir<%A>, ir<%idxprom>
+; CHECK-NEXT:    WIDEN store ir<%arrayidx3>, ir<%conv1>
+; CHECK-NEXT:    EMIT vp<%11> = VF * UF +(nuw) vp<%3>
+; CHECK-NEXT:    EMIT branch-on-count vp<%11> vp<%2>
+; CHECK-NEXT:    No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  Successor(s): middle.block
+; CHECK:       middle.block:
+; CHECK-NEXT:  No successors
+; CHECK-NEXT:  }
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF 1 For instruction: %conv1 = fadd float %1, 1.000000e+00
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 1 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 1 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Scalar loop costs: 11.
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 5 for VF 2 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 4 for VF 2 For instruction: %conv1 = fadd float %1, 1.000000e+00
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 2 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 5 for VF 2 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 2 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width 2 costs: 10.
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 9 for VF 4 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 4 for VF 4 For instruction: %conv1 = fadd float %1, 1.000000e+00
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of 9 for VF 4 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width 4 costs: 7.
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 1 For instruction: %conv1 = fadd float %1, 1.000000e+00
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 1 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 1 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 1 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width vscale x 1 costs: Invalid (assuming a minimum vscale of 1).
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 2 For instruction: %conv1 = fadd float %1, 1.000000e+00
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 2 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 2 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 2 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width vscale x 2 costs: Invalid (assuming a minimum vscale of 1).
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0.in8 = phi i32 [ %n, %for.body.preheader ], [ %i.0, %for.body ]
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %i.0 = add nsw i32 %i.0.in8, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %idxprom = zext i32 %i.0 to i64
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx = getelementptr inbounds float, ptr %B, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 2 for VF vscale x 4 For instruction: %conv1 = fadd float %1, 1.000000e+00
+; CHECK-NEXT:  LV: Found an estimated cost of 0 for VF vscale x 4 For instruction: %arrayidx3 = getelementptr inbounds float, ptr %A, i64 %idxprom
+; CHECK-NEXT:  LV: Found an estimated cost of Invalid for VF vscale x 4 For instruction: store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %cmp = icmp ugt i64 %indvars.iv, 1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: %indvars.iv.next = add nsw i64 %indvars.iv, -1
+; CHECK-NEXT:  LV: Found an estimated cost of 1 for VF vscale x 4 For instruction: br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit, !llvm.loop !0
+; CHECK-NEXT:  LV: Vector loop of width vscale x 4 costs: Invalid (assuming a minimum vscale of 1).
+; CHECK-NEXT:  LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load %1 = load float, ptr %arrayidx, align 4
+; CHECK-NEXT:  LV: Interleaving disabled by the pass manager
+; CHECK-NEXT:  remark: <unknown>:0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): load
+; CHECK-NEXT:  LV: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store store float %conv1, ptr %arrayidx3, align 4
+; CHECK-NEXT:  LV: Interleaving disabled by the pass manager
+; CHECK-NEXT:  remark: <unknown>:0:0: Instruction with invalid costs prevented vectorization at VF=(vscale x 1, vscale x 2, vscale x 4): store
+; CHECK-NEXT:  LV: Vectorization seems to be not beneficial, but was forced by a user.
+; CHECK-NEXT:  LV: Selecting VF: 4.
+; CHECK-NEXT:  LV(REG): Calculating max register usage:
+; CHECK-NEXT:  LV(REG): At #0 Interval # 0
+; CHECK-NEXT:  LV(REG): At #1 Interval # 1
+; CHECK-NEXT:  LV(REG): At #2 Interval # 2
+; CHECK-NEXT:  LV(REG): At #3 Interval # 2
+; CHECK-NEXT:  LV(REG): At #4 Interval # 2
+; CHECK-NEXT:  LV(REG): At #5 Interval # 3
+; CHECK-NEXT:  LV(REG): At #6 Interval # 3
+; CHECK-NEXT:  LV(REG): At #7 Interval # 3
+; CHECK-NEXT:  LV(REG): At #9 Interval # 1
+; CHECK-NEXT:  LV(REG): At #10 Interval # 2
+; CHECK-NEXT:  LV(REG): VF = 4
+; CHECK-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 1 registers
+; CHECK-NEXT:  LV(REG): Found invariant usage: 1 item
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
+; CHECK-NEXT:  LV: The target has 31 registers of RISCV::GPRRC register class
+; CHECK-NEXT:  LV: The target has 32 registers of RISCV::VRRC register class
+; CHECK-NEXT:  LV: Loop cost is 29
+; CHECK-NEXT:  LV: IC is 2
+; CHECK-NEXT:  LV: VF is 4
+; CHECK-NEXT:  LV: Not Interleaving.
+; CHECK-NEXT:  LV: Interleaving is not beneficial.
+; CHECK-NEXT:  LV: Found a vectorizable loop (4) in <stdin>
+; CHECK-NEXT:  LEV: Epilogue vectorization is not profitable for this loop
+; CHECK-NEXT:  Executing best plan with VF=4, UF=1
+; CHECK-NEXT:  LV: Interleaving disabled by the pass manager
+;
 entry:
   %cmp7 = icmp sgt i32 %n, 0
   br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup