[llvm] [RISCV] Don't vectorize for loops with small trip count (PR #132176)
Pengcheng Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 20 03:04:30 PDT 2025
https://github.com/wangpc-pp created https://github.com/llvm/llvm-project/pull/132176
Inspired by https://reviews.llvm.org/D130755.
I don't know the logic behind the value 5, it is copied from AArch64.
For some tests, I have to change the trip count so that we don't
break what they are testing.
>From 8d8b74f03cd21b54a96b4df3c3166cac3928d81b Mon Sep 17 00:00:00 2001
From: Wang Pengcheng <wangpengcheng.pp at bytedance.com>
Date: Thu, 20 Mar 2025 18:00:03 +0800
Subject: [PATCH] [RISCV] Don't vectorize for loops with small trip count
Inspired by https://reviews.llvm.org/D130755.
I don't know the logic behind the value 5, it is copied from AArch64.
For some tests, I have to change the trip count so that we don't
break what they are testing.
---
.../Target/RISCV/RISCVTargetTransformInfo.h | 4 +
.../LoopVectorize/RISCV/low-trip-count.ll | 82 ++-------
.../Transforms/LoopVectorize/RISCV/pr88802.ll | 172 +++++++++++++-----
.../LoopVectorize/RISCV/short-trip-count.ll | 58 +-----
.../truncate-to-minimal-bitwidth-cost.ll | 57 ++++--
.../truncate-to-minimal-bitwidth-evl-crash.ll | 54 +++---
6 files changed, 217 insertions(+), 210 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 8ffe1b08d1e26..3982f30da01a7 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -388,6 +388,10 @@ class RISCVTTIImpl : public BasicTTIImplBase<RISCVTTIImpl> {
bool enableInterleavedAccessVectorization() { return true; }
+ unsigned getMinTripCountTailFoldingThreshold() const {
+ return ST->hasVInstructions() ? 5 : 0;
+ }
+
enum RISCVRegisterClass { GPRRC, FPRRC, VRRC };
unsigned getNumberOfRegisters(unsigned ClassID) const {
switch (ClassID) {
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
index 10ac870c112ae..852e7a18ecbb8 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/low-trip-count.ll
@@ -45,47 +45,19 @@ for.end: ; preds = %for.body
define void @trip3_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
; CHECK-LABEL: @trip3_i8(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
-; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 3, [[TMP2]]
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 2
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 3)
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP9]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
-; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 2 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 2 x i8> @llvm.masked.load.nxv2i8.p0(ptr [[TMP12]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]], <vscale x 2 x i8> poison)
-; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 2 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
-; CHECK-NEXT: call void @llvm.masked.store.nxv2i8.p0(<vscale x 2 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
-; CHECK: middle.block:
-; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
+; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]]
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 [[I_08]]
; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 3
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -112,47 +84,19 @@ for.end: ; preds = %for.body
define void @trip5_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture noundef readonly %src) #0 {
; CHECK-LABEL: @trip5_i8(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
-; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 5, [[TMP2]]
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 4
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 5)
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[TMP8]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP9]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
-; CHECK-NEXT: [[TMP10:%.*]] = shl <vscale x 4 x i8> [[WIDE_MASKED_LOAD]], splat (i8 1)
-; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD1:%.*]] = call <vscale x 4 x i8> @llvm.masked.load.nxv4i8.p0(ptr [[TMP12]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i8> poison)
-; CHECK-NEXT: [[TMP13:%.*]] = add <vscale x 4 x i8> [[TMP10]], [[WIDE_MASKED_LOAD1]]
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0
-; CHECK-NEXT: call void @llvm.masked.store.nxv4i8.p0(<vscale x 4 x i8> [[TMP13]], ptr [[TMP14]], i32 1, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
-; CHECK: middle.block:
-; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
-; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST]], i64 [[I_08]]
+; CHECK-NEXT: [[I_08:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[DST:%.*]], i64 [[I_08]]
; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
; CHECK-NEXT: [[MUL:%.*]] = shl i8 [[TMP15]], 1
-; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1]], i64 [[I_08]]
+; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[DST1:%.*]], i64 [[I_08]]
; CHECK-NEXT: [[TMP16:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[ADD:%.*]] = add i8 [[MUL]], [[TMP16]]
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 5
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -219,7 +163,7 @@ define void @trip8_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 8
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -277,7 +221,7 @@ define void @trip16_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 16
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -336,7 +280,7 @@ define void @trip32_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 32
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -380,7 +324,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
; CHECK-NEXT: store <8 x i8> [[TMP6]], ptr [[TMP5]], align 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 24
-; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -397,7 +341,7 @@ define void @trip24_i8(ptr noalias nocapture noundef %dst, ptr noalias nocapture
; CHECK-NEXT: store i8 [[ADD]], ptr [[ARRAYIDX1]], align 1
; CHECK-NEXT: [[INC]] = add nuw nsw i64 [[I_08]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], 24
-; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
index da859963a7021..4f05292c6fb94 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll
@@ -5,65 +5,149 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-LABEL: define void @test(
; CHECK-SAME: ptr [[P:%.*]], i64 [[A:%.*]], i8 [[B:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH1:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP2:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT]], splat (i64 48)
-; CHECK-NEXT: [[TMP3:%.*]] = ashr <4 x i64> [[TMP2]], splat (i64 52)
-; CHECK-NEXT: [[TMP4:%.*]] = trunc <4 x i64> [[TMP3]] to <4 x i32>
-; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT2]] to <4 x i32>
-; CHECK-NEXT: br label [[VECTOR_BODY1:%.*]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> poison, i8 [[B]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[A]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP0:%.*]] = shl <16 x i64> [[BROADCAST_SPLAT2]], splat (i64 48)
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i64> [[TMP0]], splat (i64 52)
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i64> [[TMP1]] to <16 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = zext <16 x i8> [[BROADCAST_SPLAT]] to <16 x i32>
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: vector.body:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE8:%.*]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE8]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[TMP0]], i32 3)
-; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[VEC_IND]], splat (i32 2)
-; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> [[ACTIVE_LANE_MASK]], <4 x i1> [[TMP1]], <4 x i1> zeroinitializer
-; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP6]], <4 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP7:%.*]] = shl <4 x i32> [[PREDPHI]], splat (i32 8)
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[TMP7]] to <4 x i8>
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
-; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[VECTOR_BODY:%.*]]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE32:%.*]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE32]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[TMP4]], i32 9)
+; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <16 x i32> [[VEC_IND]], splat (i32 2)
+; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[ACTIVE_LANE_MASK]], <16 x i1> [[TMP5]], <16 x i1> zeroinitializer
+; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP3]], <16 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP7:%.*]] = shl <16 x i32> [[PREDPHI]], splat (i32 8)
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[TMP7]] to <16 x i8>
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0
+; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i8> [[TMP8]], i32 0
-; CHECK-NEXT: store i8 [[TMP10]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[VECTOR_BODY]]
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i8> [[TMP8]], i32 0
+; CHECK-NEXT: store i8 [[TMP19]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
; CHECK: pred.store.continue:
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
-; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
+; CHECK-NEXT: [[CMP_N:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 1
+; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH:%.*]]
; CHECK: pred.store.if3:
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i8> [[TMP8]], i32 1
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP8]], i32 1
; CHECK-NEXT: store i8 [[TMP12]], ptr [[P]], align 1
-; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
+; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: pred.store.continue4:
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 2
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
; CHECK: pred.store.if5:
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i8> [[TMP8]], i32 2
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP8]], i32 2
; CHECK-NEXT: store i8 [[TMP14]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
; CHECK: pred.store.continue6:
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
-; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 3
+; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
; CHECK: pred.store.if7:
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i8> [[TMP8]], i32 3
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP8]], i32 3
; CHECK-NEXT: store i8 [[TMP16]], ptr [[P]], align 1
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
; CHECK: pred.store.continue8:
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
-; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY1]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 4
+; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10:%.*]]
+; CHECK: pred.store.if9:
+; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[TMP8]], i32 4
+; CHECK-NEXT: store i8 [[TMP18]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE10]]
+; CHECK: pred.store.continue10:
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 5
+; CHECK-NEXT: br i1 [[TMP41]], label [[PRED_STORE_IF11:%.*]], label [[PRED_STORE_CONTINUE12:%.*]]
+; CHECK: pred.store.if11:
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[TMP8]], i32 5
+; CHECK-NEXT: store i8 [[TMP20]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE12]]
+; CHECK: pred.store.continue12:
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 6
+; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_STORE_IF13:%.*]], label [[PRED_STORE_CONTINUE14:%.*]]
+; CHECK: pred.store.if13:
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[TMP8]], i32 6
+; CHECK-NEXT: store i8 [[TMP22]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE14]]
+; CHECK: pred.store.continue14:
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 7
+; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_STORE_IF15:%.*]], label [[PRED_STORE_CONTINUE16:%.*]]
+; CHECK: pred.store.if15:
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[TMP8]], i32 7
+; CHECK-NEXT: store i8 [[TMP24]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE16]]
+; CHECK: pred.store.continue16:
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 8
+; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF17:%.*]], label [[PRED_STORE_CONTINUE18:%.*]]
+; CHECK: pred.store.if17:
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP8]], i32 8
+; CHECK-NEXT: store i8 [[TMP26]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE18]]
+; CHECK: pred.store.continue18:
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 9
+; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_STORE_IF19:%.*]], label [[PRED_STORE_CONTINUE20:%.*]]
+; CHECK: pred.store.if19:
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP8]], i32 9
+; CHECK-NEXT: store i8 [[TMP28]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE20]]
+; CHECK: pred.store.continue20:
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 10
+; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF21:%.*]], label [[PRED_STORE_CONTINUE22:%.*]]
+; CHECK: pred.store.if21:
+; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP8]], i32 10
+; CHECK-NEXT: store i8 [[TMP30]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE22]]
+; CHECK: pred.store.continue22:
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 11
+; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_STORE_IF23:%.*]], label [[PRED_STORE_CONTINUE24:%.*]]
+; CHECK: pred.store.if23:
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP8]], i32 11
+; CHECK-NEXT: store i8 [[TMP32]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE24]]
+; CHECK: pred.store.continue24:
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 12
+; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_STORE_IF25:%.*]], label [[PRED_STORE_CONTINUE26:%.*]]
+; CHECK: pred.store.if25:
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP8]], i32 12
+; CHECK-NEXT: store i8 [[TMP34]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE26]]
+; CHECK: pred.store.continue26:
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 13
+; CHECK-NEXT: br i1 [[TMP35]], label [[PRED_STORE_IF27:%.*]], label [[PRED_STORE_CONTINUE28:%.*]]
+; CHECK: pred.store.if27:
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP8]], i32 13
+; CHECK-NEXT: store i8 [[TMP36]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE28]]
+; CHECK: pred.store.continue28:
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 14
+; CHECK-NEXT: br i1 [[TMP37]], label [[PRED_STORE_IF29:%.*]], label [[PRED_STORE_CONTINUE30:%.*]]
+; CHECK: pred.store.if29:
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP8]], i32 14
+; CHECK-NEXT: store i8 [[TMP38]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE30]]
+; CHECK: pred.store.continue30:
+; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 15
+; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_STORE_IF31:%.*]], label [[PRED_STORE_CONTINUE32]]
+; CHECK: pred.store.if31:
+; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP8]], i32 15
+; CHECK-NEXT: store i8 [[TMP40]], ptr [[P]], align 1
+; CHECK-NEXT: br label [[PRED_STORE_CONTINUE32]]
+; CHECK: pred.store.continue32:
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i32> [[VEC_IND]], splat (i32 16)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
+; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[FOR_COND]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT: br i1 true, label [[EXIT1:%.*]], label [[SCALAR_PH1]]
; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 4, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
-; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: br label [[FOR_COND1:%.*]]
; CHECK: for.cond:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH1]] ], [ [[ADD:%.*]], [[FOR_BODY:%.*]] ]
; CHECK-NEXT: [[ADD]] = add i32 [[IV]], 1
; CHECK-NEXT: [[CMP_SLT:%.*]] = icmp slt i32 [[IV]], 2
; CHECK-NEXT: [[SHL:%.*]] = shl i64 [[A]], 48
@@ -74,12 +158,12 @@ define void @test(ptr %p, i64 %a, i8 %b) {
; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[B]] to i32
; CHECK-NEXT: br label [[FOR_BODY]]
; CHECK: for.body:
-; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], [[FOR_COND]] ], [ [[ZEXT]], [[COND_FALSE]] ]
+; CHECK-NEXT: [[COND:%.*]] = phi i32 [ [[TRUNC_I32]], [[FOR_COND1]] ], [ [[ZEXT]], [[COND_FALSE]] ]
; CHECK-NEXT: [[SHL_I32:%.*]] = shl i32 [[COND]], 8
; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHL_I32]] to i8
; CHECK-NEXT: store i8 [[TRUNC]], ptr [[P]], align 1
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 2
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND]], label [[EXIT]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[IV]], 8
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_COND1]], label [[EXIT1]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -104,7 +188,7 @@ for.body: ; preds = %cond.false, %for.co
%shl.i32 = shl i32 %cond, 8
%trunc = trunc i32 %shl.i32 to i8
store i8 %trunc, ptr %p, align 1
- %cmp = icmp slt i32 %iv, 2
+ %cmp = icmp slt i32 %iv, 8
br i1 %cmp, label %for.cond, label %exit
exit: ; preds = %for.body
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll
index 3386a7d3972aa..b623d09535c75 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/short-trip-count.ll
@@ -4,38 +4,16 @@
define void @small_trip_count_min_vlen_128(ptr nocapture %a) nounwind vscale_range(4,1024) {
; CHECK-LABEL: @small_trip_count_min_vlen_128(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[TMP0]], 1
-; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP1]]
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP0]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 1 x i1> @llvm.get.active.lane.mask.nxv1i1.i32(i32 0, i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4:%.*]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 1 x i32> @llvm.masked.load.nxv1i32.p0(ptr [[TMP7]], i32 4, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]], <vscale x 1 x i32> poison)
-; CHECK-NEXT: [[TMP6:%.*]] = add nsw <vscale x 1 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1)
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0
-; CHECK-NEXT: call void @llvm.masked.store.nxv1i32.p0(<vscale x 1 x i32> [[TMP6]], ptr [[TMP8]], i32 4, <vscale x 1 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
-; CHECK: middle.block:
-; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 [[IV]]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP4:%.*]], i32 [[IV]]
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], 3
-; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
@@ -59,40 +37,16 @@ exit:
define void @small_trip_count_min_vlen_32(ptr nocapture %a) nounwind vscale_range(1,1024) {
; CHECK-LABEL: @small_trip_count_min_vlen_32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP1]], 1
-; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 4, [[TMP2]]
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]]
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
-; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP3]], 4
-; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
-; CHECK: vector.body:
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 0, i32 4)
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6:%.*]], i32 0
-; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
-; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x i32> @llvm.masked.load.nxv4i32.p0(ptr [[TMP9]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
-; CHECK-NEXT: [[TMP8:%.*]] = add nsw <vscale x 4 x i32> [[WIDE_MASKED_LOAD]], splat (i32 1)
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0
-; CHECK-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> [[TMP8]], ptr [[TMP10]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
-; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
-; CHECK: middle.block:
-; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
-; CHECK: scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 [[IV]]
+; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[LOOP]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[TMP6:%.*]], i32 [[IV]]
; CHECK-NEXT: [[V:%.*]] = load i32, ptr [[GEP]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[V]], 1
; CHECK-NEXT: store i32 [[ADD]], ptr [[GEP]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[IV]], 3
-; CHECK-NEXT: br i1 [[COND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret void
;
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
index 94c0036e4c8b6..554fb18f9fa4d 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll
@@ -159,22 +159,40 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
; CHECK-NEXT: [[ENTRY:.*]]:
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[X]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP0:%.*]] = trunc <2 x i8> [[BROADCAST_SPLAT]] to <2 x i1>
-; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> splat (i1 true), [[TMP0]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[DST]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT1]], <2 x ptr> poison, <2 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4
+; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP3]], 1
+; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 9, [[TMP12]]
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vscale.i32()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i32 [[TMP10]], 4
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i8> poison, i8 [[X]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i8> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i8> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i32 [[N_VEC]] to i8
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <vscale x 4 x i8> [[BROADCAST_SPLAT]] to <vscale x 4 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = or <vscale x 4 x i1> trunc (<vscale x 4 x i8> splat (i8 23) to <vscale x 4 x i1>), [[TMP7]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x ptr> poison, ptr [[DST]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x ptr> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x ptr> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 0, i32 2)
-; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer
-; CHECK-NEXT: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> zeroinitializer, <2 x ptr> [[BROADCAST_SPLAT4]], i32 1, <2 x i1> [[TMP3]])
-; CHECK-NEXT: br label %[[MIDDLE_BLOCK:.*]]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[INDEX]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT3]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
+; CHECK-NEXT: [[TMP14:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP13]]
+; CHECK-NEXT: [[VEC_IV:%.*]] = add <vscale x 4 x i32> [[BROADCAST_SPLAT4]], [[TMP14]]
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i32> [[VEC_IV]], i32 0
+; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i32(i32 [[TMP15]], i32 9)
+; CHECK-NEXT: [[TMP11:%.*]] = select <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i1> [[TMP8]], <vscale x 4 x i1> zeroinitializer
+; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i8.nxv4p0(<vscale x 4 x i8> zeroinitializer, <vscale x 4 x ptr> [[BROADCAST_SPLAT2]], i32 1, <vscale x 4 x i1> [[TMP11]])
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[TMP6]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
; CHECK: [[LOOP_HEADER]]:
; CHECK-NEXT: [[F_039:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH:.*]] ]
@@ -187,8 +205,8 @@ define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[ADD]] = add i8 [[F_039]], 1
; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[F_039]] to i32
-; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], 1
-; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], 8
+; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -208,7 +226,7 @@ then:
loop.latch:
%add = add i8 %f.039, 1
%conv = sext i8 %f.039 to i32
- %cmp = icmp slt i32 %conv, 1
+ %cmp = icmp slt i32 %conv, 8
br i1 %cmp, label %loop.header, label %exit
exit:
@@ -287,7 +305,7 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64
; CHECK-NEXT: call void @llvm.masked.scatter.nxv2f64.nxv2p0(<vscale x 2 x double> [[WIDE_MASKED_GATHER]], <vscale x 2 x ptr> [[BROADCAST_SPLAT6]], i32 8, <vscale x 2 x i1> [[TMP8]])
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -308,7 +326,7 @@ define void @icmp_only_first_op_truncated(ptr noalias %dst, i32 %x, i64 %N, i64
; CHECK: [[LOOP_LATCH]]:
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[V]]
-; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
@@ -348,7 +366,8 @@ attributes #1 = { "target-features"="+64bit,+v" }
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
-; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
-; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
-; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META2]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
;.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
index ba7158eb02d90..b0c2ae6563129 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll
@@ -12,47 +12,49 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) {
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], 1
-; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 2, [[TMP1]]
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 8
+; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], 1
+; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 9, [[TMP2]]
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]]
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 8
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[AVL:%.*]] = sub i64 2, [[EVL_BASED_IV]]
-; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 1, i1 true)
-; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[EVL_BASED_IV]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP5]], i32 0
-; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 1 x i8> @llvm.vp.load.nxv1i8.p0(ptr align 1 [[TMP6]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: [[TMP7:%.*]] = zext <vscale x 1 x i8> [[VP_OP_LOAD]] to <vscale x 1 x i16>
-; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 1 x i16> zeroinitializer, [[TMP7]]
-; CHECK-NEXT: [[VP_OP1:%.*]] = lshr <vscale x 1 x i16> [[TMP12]], trunc (<vscale x 1 x i32> splat (i32 1) to <vscale x 1 x i16>)
-; CHECK-NEXT: [[TMP8:%.*]] = trunc <vscale x 1 x i16> [[VP_OP1]] to <vscale x 1 x i8>
-; CHECK-NEXT: call void @llvm.vp.scatter.nxv1i8.nxv1p0(<vscale x 1 x i8> [[TMP8]], <vscale x 1 x ptr> align 1 zeroinitializer, <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]])
-; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP3]] to i64
-; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
-; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: [[AVL:%.*]] = sub i64 9, [[EVL_BASED_IV]]
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
+; CHECK-NEXT: [[IV:%.*]] = add i64 [[EVL_BASED_IV]], 0
+; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[GEP_SRC]], i32 0
+; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP8]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT: [[TMP9:%.*]] = zext <vscale x 8 x i8> [[VP_OP_LOAD]] to <vscale x 8 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = mul <vscale x 8 x i16> zeroinitializer, [[TMP9]]
+; CHECK-NEXT: [[TMP15:%.*]] = lshr <vscale x 8 x i16> [[TMP10]], trunc (<vscale x 8 x i32> splat (i32 1) to <vscale x 8 x i16>)
+; CHECK-NEXT: [[TMP12:%.*]] = trunc <vscale x 8 x i16> [[TMP15]] to <vscale x 8 x i8>
+; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP12]], <vscale x 8 x ptr> align 1 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP5]])
+; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP5]] to i64
+; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP13]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; CHECK-NEXT: [[GEP_SRC:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[GEP_SRC]], align 1
+; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[GEP_SRC1:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[IV1]]
+; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr [[GEP_SRC1]], align 1
; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP11]] to i32
; CHECK-NEXT: [[MUL16:%.*]] = mul i32 0, [[CONV]]
; CHECK-NEXT: [[SHR35:%.*]] = lshr i32 [[MUL16]], 1
; CHECK-NEXT: [[CONV36:%.*]] = trunc i32 [[SHR35]] to i8
; CHECK-NEXT: store i8 [[CONV36]], ptr null, align 1
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV1]], 1
+; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV1]], 8
; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
@@ -70,7 +72,7 @@ loop: ; preds = %loop, %entry
%conv36 = trunc i32 %shr35 to i8
store i8 %conv36, ptr null, align 1
%iv.next = add i64 %iv, 1
- %ec = icmp eq i64 %iv, 1
+ %ec = icmp eq i64 %iv, 8
br i1 %ec, label %exit, label %loop
exit: ; preds = %loop
More information about the llvm-commits
mailing list