[llvm] [VPlan] Always consider register pressure on RISC-V (PR #156951)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 22:51:30 PDT 2025
https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/156951
>From 77df7b4bc901ddd2c5878a9ddd6c68e2e601c110 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 5 Sep 2025 01:25:12 +0800
Subject: [PATCH 1/6] Precommit test
---
.../LoopVectorize/RISCV/reg-usage-prune-vf.ll | 139 ++++++++++++++++++
1 file changed, 139 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
new file mode 100644
index 0000000000000..92f0a131e03d4
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -S < %s | FileCheck %s
+
+define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul <vscale x 8 x i64> [[TMP0]], splat (i64 2)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul <vscale x 8 x i64> [[TMP2]], splat (i64 3)
+; CHECK-NEXT: [[INDUCTION1:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul <vscale x 8 x i64> [[TMP4]], splat (i64 4)
+; CHECK-NEXT: [[INDUCTION2:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP5]]
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
+; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP9]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP11]]
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sub <vscale x 8 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP13]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT: [[TMP15:%.*]] = sub <vscale x 8 x i64> [[VEC_IND3]], splat (i64 1)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP15]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP16]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT: [[TMP17:%.*]] = sub <vscale x 8 x i64> [[VEC_IND4]], splat (i64 1)
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP17]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP18]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3
+; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0
+; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 24 x i8> @llvm.vector.interleave3.nxv24i8(<vscale x 8 x i8> [[WIDE_MASKED_GATHER]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER10]])
+; CHECK-NEXT: call void @llvm.vp.store.nxv24i8.p0(<vscale x 24 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 24 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
+; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
+; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
+; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <vscale x 8 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
+; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <vscale x 8 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: br label %[[LOOP:.*]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT: [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1
+; CHECK-NEXT: [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]]
+; CHECK-NEXT: [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1
+; CHECK-NEXT: [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1
+; CHECK-NEXT: [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]]
+; CHECK-NEXT: [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1
+; CHECK-NEXT: [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1
+; CHECK-NEXT: [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]]
+; CHECK-NEXT: [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1
+; CHECK-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV]], 3
+; CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]]
+; CHECK-NEXT: [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0
+; CHECK-NEXT: store i8 [[A]], ptr [[A_GEP1]], align 1
+; CHECK-NEXT: [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1
+; CHECK-NEXT: store i8 [[B]], ptr [[B_GEP1]], align 1
+; CHECK-NEXT: [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2
+; CHECK-NEXT: store i8 [[C]], ptr [[C_GEP1]], align 1
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT: [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2
+; CHECK-NEXT: [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3
+; CHECK-NEXT: [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4
+; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 1024
+; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %wide.iv.0 = phi i64 [ 0, %entry ], [ %wide.iv.0.next, %loop ]
+ %wide.iv.1 = phi i64 [ 0, %entry ], [ %wide.iv.1.next, %loop ]
+ %wide.iv.2 = phi i64 [ 0, %entry ], [ %wide.iv.2.next, %loop ]
+
+ %wide.iv.0.sub = sub i64 %wide.iv.0, 1
+ %a.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.0.sub
+ %a = load i8, ptr %a.gep0
+
+ %wide.iv.1.sub = sub i64 %wide.iv.1, 1
+ %b.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.1.sub
+ %b = load i8, ptr %b.gep0
+
+ %wide.iv.2.sub = sub i64 %wide.iv.2, 1
+ %c.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.2.sub
+ %c = load i8, ptr %c.gep0
+
+ %iv.mul = mul i64 %iv, 3
+ %base = getelementptr i8, ptr %p1, i64 %iv.mul
+
+ %a.gep1 = getelementptr i8, ptr %base, i8 0
+ store i8 %a, ptr %a.gep1
+
+ %b.gep1 = getelementptr i8, ptr %base, i8 1
+ store i8 %b, ptr %b.gep1
+
+ %c.gep1 = getelementptr i8, ptr %base, i8 2
+ store i8 %c, ptr %c.gep1
+
+ %iv.next = add i64 %iv, 1
+ %wide.iv.0.next = add i64 %wide.iv.0, 2
+ %wide.iv.1.next = add i64 %wide.iv.1, 3
+ %wide.iv.2.next = add i64 %wide.iv.2, 4
+ %done = icmp eq i64 %iv, 1024
+ br i1 %done, label %exit, label %loop
+
+exit:
+ ret void
+}
>From b6a52d8140a491187babb61f46ec4817311dadee Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 5 Sep 2025 02:53:14 +0800
Subject: [PATCH 2/6] [VPlan] Always consider register pressure on RISC-V
---
.../llvm/Analysis/TargetTransformInfo.h | 4 ++
.../llvm/Analysis/TargetTransformInfoImpl.h | 2 +
llvm/lib/Analysis/TargetTransformInfo.cpp | 4 ++
.../Target/RISCV/RISCVTargetTransformInfo.h | 2 +
.../Transforms/Vectorize/LoopVectorize.cpp | 3 +
.../LoopVectorize/RISCV/reg-usage-bf16.ll | 11 ++-
.../LoopVectorize/RISCV/reg-usage-f16.ll | 21 +++---
.../LoopVectorize/RISCV/reg-usage-prune-vf.ll | 66 +++++++++---------
.../LoopVectorize/RISCV/reg-usage.ll | 68 +++++++++++--------
9 files changed, 100 insertions(+), 81 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 1f95d13815f8e..86ecce56f3128 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1843,6 +1843,10 @@ class TargetTransformInfo {
/// otherwise scalar epilogue loop.
LLVM_ABI bool preferEpilogueVectorization() const;
+ /// \returns True if the loop vectorizer should discard any VFs that may have
+ /// too high of a register pressure as determined by getNumberOfRegisters.
+ LLVM_ABI bool shouldConsiderVectorizationRegPressure() const;
+
/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
LLVM_ABI bool shouldExpandReduction(const IntrinsicInst *II) const;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index b58386b94bba4..566e1cf51631a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1105,6 +1105,8 @@ class TargetTransformInfoImplBase {
virtual bool preferEpilogueVectorization() const { return true; }
+ virtual bool shouldConsiderVectorizationRegPressure() const { return false; }
+
virtual bool shouldExpandReduction(const IntrinsicInst *II) const {
return true;
}
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 899806bf37348..09b50c5270e57 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1425,6 +1425,10 @@ bool TargetTransformInfo::preferEpilogueVectorization() const {
return TTIImpl->preferEpilogueVectorization();
}
+bool TargetTransformInfo::shouldConsiderVectorizationRegPressure() const {
+ return TTIImpl->shouldConsiderVectorizationRegPressure();
+}
+
TargetTransformInfo::VPLegalization
TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
return TTIImpl->getVPLegalizationStrategy(VPI);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 6bd7d51daff69..47e0a250d285a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -141,6 +141,8 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
return false;
}
+ bool shouldConsiderVectorizationRegPressure() const override { return true; }
+
InstructionCost
getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
unsigned AddressSpace,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5a1d1e75e2d5d..c0b1d7421af31 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3702,6 +3702,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF(
ElementCount VF) {
+ if (TTI.shouldConsiderVectorizationRegPressure())
+ return true;
+
if (!useMaxBandwidth(VF.isScalable()
? TargetTransformInfo::RGK_ScalableVector
: TargetTransformInfo::RGK_FixedWidthVector))
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
index 346f1cbcc7e3d..097f05d222cf6 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
@@ -1,14 +1,11 @@
; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
-
-; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
-; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
-; and remove.
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
; CHECK-LABEL: add
-; CHECK: LV(REG): Found max usage: 2 item
-; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK: LV(REG): VF = vscale x 4
+; CHECK-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
; CHECK-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
index b25bc485a9ca7..8bbfdf39a0624 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
@@ -1,20 +1,19 @@
; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
-
-; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
-; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
-; and remove.
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
-; CHECK-LABEL: add
-; ZVFH: LV(REG): Found max usage: 2 item
-; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; ZVFH-LABEL: add
+; ZVFH: LV(REG): VF = vscale x 4
+; ZVFH-NEXT: LV(REG): Found max usage: 2 item
+; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
; ZVFH-NEXT: LV(REG): Found invariant usage: 1 item
; ZVFH-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; ZVFHMIN: LV(REG): Found max usage: 2 item
-; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; ZVFHMIN-LABEL: add
+; ZVFHMIN: LV(REG): VF = vscale x 4
+; ZVFHMIN-NEXT: LV(REG): Found max usage: 2 item
+; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
; ZVFHMIN-NEXT: LV(REG): Found invariant usage: 1 item
; ZVFHMIN-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
index 92f0a131e03d4..72acffeb60c6c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
@@ -7,56 +7,56 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP1:%.*]] = mul <vscale x 8 x i64> [[TMP0]], splat (i64 2)
-; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP3:%.*]] = mul <vscale x 8 x i64> [[TMP2]], splat (i64 3)
-; CHECK-NEXT: [[INDUCTION1:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT: [[TMP5:%.*]] = mul <vscale x 8 x i64> [[TMP4]], splat (i64 4)
-; CHECK-NEXT: [[INDUCTION2:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP5]]
+; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul <vscale x 4 x i64> [[TMP0]], splat (i64 2)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul <vscale x 4 x i64> [[TMP2]], splat (i64 3)
+; CHECK-NEXT: [[INDUCTION1:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul <vscale x 4 x i64> [[TMP4]], splat (i64 4)
+; CHECK-NEXT: [[INDUCTION2:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP5]]
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND4:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
; CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
; CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP9]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP10]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP6]] to i64
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP11]]
-; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
-; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = sub <vscale x 8 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP13]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
-; CHECK-NEXT: [[TMP15:%.*]] = sub <vscale x 8 x i64> [[VEC_IND3]], splat (i64 1)
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP15]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP16]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
-; CHECK-NEXT: [[TMP17:%.*]] = sub <vscale x 8 x i64> [[VEC_IND4]], splat (i64 1)
-; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP17]]
-; CHECK-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP18]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sub <vscale x 4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP13]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT: [[TMP15:%.*]] = sub <vscale x 4 x i64> [[VEC_IND3]], splat (i64 1)
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP15]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT: [[TMP17:%.*]] = sub <vscale x 4 x i64> [[VEC_IND4]], splat (i64 1)
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP17]]
+; CHECK-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]]
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0
; CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 24 x i8> @llvm.vector.interleave3.nxv24i8(<vscale x 8 x i8> [[WIDE_MASKED_GATHER]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER10]])
-; CHECK-NEXT: call void @llvm.vp.store.nxv24i8.p0(<vscale x 24 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 24 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 12 x i8> @llvm.vector.interleave3.nxv12i8(<vscale x 4 x i8> [[WIDE_MASKED_GATHER]], <vscale x 4 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 4 x i8> [[WIDE_MASKED_GATHER10]])
+; CHECK-NEXT: call void @llvm.vp.store.nxv12i8.p0(<vscale x 12 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 12 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
-; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <vscale x 8 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
-; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <vscale x 8 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
+; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <vscale x 4 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
+; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <vscale x 4 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
; CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
; CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
index 116ccc9961795..99139da67bb78 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
@@ -5,50 +5,54 @@
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN: -riscv-v-register-bit-width-lmul=1 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN: -riscv-v-register-bit-width-lmul=1 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN: -riscv-v-register-bit-width-lmul=2 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN: -riscv-v-register-bit-width-lmul=2 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN: -riscv-v-register-bit-width-lmul=4 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN: -riscv-v-register-bit-width-lmul=4 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4
; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
; RUN: -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN: -riscv-v-register-bit-width-lmul=8 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN: -riscv-v-register-bit-width-lmul=8 \
; RUN: -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8
-; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
-; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
-; and remove.
-
define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
-; CHECK-LABEL: add
+; CHECK-SCALAR-LABEL: add
; CHECK-SCALAR: LV(REG): VF = 1
; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 2 item
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers
; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL1: LV(REG): Found max usage: 2 item
-; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL1-LABEL: add
+; CHECK-LMUL1: LV(REG): VF = vscale x 2
+; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
; CHECK-LMUL1-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL2: LV(REG): Found max usage: 2 item
-; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL2-LABEL: add
+; CHECK-LMUL2: LV(REG): VF = vscale x 4
+; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
; CHECK-LMUL2-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL4: LV(REG): Found max usage: 2 item
-; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL4-LABEL: add
+; CHECK-LMUL4: LV(REG): VF = vscale x 8
+; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
; CHECK-LMUL4-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL8: LV(REG): Found max usage: 2 item
-; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL8-LABEL: add
+; CHECK-LMUL8: LV(REG): VF = vscale x 16
+; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
; CHECK-LMUL8-NEXT: LV(REG): Found invariant usage: 1 item
; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
@@ -76,22 +80,26 @@ for.body:
}
define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
-; CHECK-LABEL: goo
+; CHECK-SCALAR-LABEL: goo
; CHECK-SCALAR: LV(REG): VF = 1
; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item
; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
-; CHECK-LMUL1: LV(REG): Found max usage: 2 item
-; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 1 registers
-; CHECK-LMUL2: LV(REG): Found max usage: 2 item
-; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
-; CHECK-LMUL4: LV(REG): Found max usage: 2 item
-; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
-; CHECK-LMUL8: LV(REG): Found max usage: 2 item
-; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
+; CHECK-LMUL1: LV(REG): VF = vscale x 2
+; CHECK-LMUL1-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL1-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 2 registers
+; CHECK-LMUL2: LV(REG): VF = vscale x 4
+; CHECK-LMUL2-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL2-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; CHECK-LMUL4: LV(REG): VF = vscale x 8
+; CHECK-LMUL4-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL4-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 8 registers
+; CHECK-LMUL8: LV(REG): VF = vscale x 16
+; CHECK-LMUL8-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL8-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 16 registers
entry:
%cmp3 = icmp sgt i32 %n, 0
br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup
>From 57d26c53251cfb2437cc265037b4906c2843af37 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 9 Sep 2025 12:33:00 +0800
Subject: [PATCH 3/6] Add flag to override setting
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 +++++++
1 file changed, 7 insertions(+)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c0b1d7421af31..e9071a98683c2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -393,6 +393,10 @@ static cl::opt<bool> EnableEarlyExitVectorization(
cl::desc(
"Enable vectorization of early exit loops with uncountable exits."));
+static cl::opt<bool> ConsiderRegPressure(
+ "vectorizer-consider-reg-pressure", cl::init(true), cl::Hidden,
+ cl::desc("Discard VFs if their register pressure is too high."));
+
// Likelyhood of bypassing the vectorized loop because there are zero trips left
// after prolog. See `emitIterationCountCheck`.
static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
@@ -3702,6 +3706,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF(
ElementCount VF) {
+ if (ConsiderRegPressure.getNumOccurrences())
+ return ConsiderRegPressure;
+
if (TTI.shouldConsiderVectorizationRegPressure())
return true;
>From 9c2cc07923bd3398269ba644cf10919ba7c3663c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 10 Sep 2025 22:17:07 +0800
Subject: [PATCH 4/6] Set flag default to false, add TODO
---
llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index aa461d36edc72..c328ff7881d0e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -394,7 +394,7 @@ static cl::opt<bool> EnableEarlyExitVectorization(
"Enable vectorization of early exit loops with uncountable exits."));
static cl::opt<bool> ConsiderRegPressure(
- "vectorizer-consider-reg-pressure", cl::init(true), cl::Hidden,
+ "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
cl::desc("Discard VFs if their register pressure is too high."));
// Likelyhood of bypassing the vectorized loop because there are zero trips left
@@ -3700,6 +3700,8 @@ bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF(
if (ConsiderRegPressure.getNumOccurrences())
return ConsiderRegPressure;
+ // TODO: We should eventually consider register pressure for all targets. The
+ // TTI hook is temporary whilst target-specific issues are being fixed.
if (TTI.shouldConsiderVectorizationRegPressure())
return true;
>From 9f620faf550390cdc175c07af4f5d13b672e57a2 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 11 Sep 2025 11:36:52 +0800
Subject: [PATCH 5/6] Add tests for flag
---
.../LoopVectorize/RISCV/reg-usage-prune-vf.ll | 94 +++++++++++++++++++
1 file changed, 94 insertions(+)
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
index 72acffeb60c6c..42f12ec2e4859 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
@@ -1,5 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -S < %s | FileCheck %s
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=true -S < %s | FileCheck %s
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=false -S < %s | FileCheck %s --check-prefix=NO-REG-PRESSURE-CHECK
define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
; CHECK-LABEL: define void @f(
@@ -94,6 +96,98 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
; CHECK: [[EXIT]]:
; CHECK-NEXT: ret void
;
+; NO-REG-PRESSURE-CHECK-LABEL: define void @f(
+; NO-REG-PRESSURE-CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-REG-PRESSURE-CHECK-NEXT: [[ENTRY:.*:]]
+; NO-REG-PRESSURE-CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; NO-REG-PRESSURE-CHECK: [[VECTOR_PH]]:
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP1:%.*]] = mul <vscale x 8 x i64> [[TMP0]], splat (i64 2)
+; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP1]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP3:%.*]] = mul <vscale x 8 x i64> [[TMP2]], splat (i64 3)
+; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION1:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP3]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP5:%.*]] = mul <vscale x 8 x i64> [[TMP4]], splat (i64 4)
+; NO-REG-PRESSURE-CHECK-NEXT: [[INDUCTION2:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP5]]
+; NO-REG-PRESSURE-CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; NO-REG-PRESSURE-CHECK: [[VECTOR_BODY]]:
+; NO-REG-PRESSURE-CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND3:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND4:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP8:%.*]] = mul i64 4, [[TMP7]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP9]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP12:%.*]] = mul i64 2, [[TMP11]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP13:%.*]] = sub <vscale x 8 x i64> [[VEC_IND]], splat (i64 1)
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP13]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP15:%.*]] = sub <vscale x 8 x i64> [[VEC_IND3]], splat (i64 1)
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP15]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP16]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP17:%.*]] = sub <vscale x 8 x i64> [[VEC_IND4]], splat (i64 1)
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP17]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP18]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0
+; NO-REG-PRESSURE-CHECK-NEXT: [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3
+; NO-REG-PRESSURE-CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 24 x i8> @llvm.vector.interleave3.nxv24i8(<vscale x 8 x i8> [[WIDE_MASKED_GATHER]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER10]])
+; NO-REG-PRESSURE-CHECK-NEXT: call void @llvm.vp.store.nxv24i8.p0(<vscale x 24 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 24 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT11]] = add <vscale x 8 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[VEC_IND_NEXT12]] = add <vscale x 8 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; NO-REG-PRESSURE-CHECK: [[MIDDLE_BLOCK]]:
+; NO-REG-PRESSURE-CHECK-NEXT: br label %[[EXIT:.*]]
+; NO-REG-PRESSURE-CHECK: [[SCALAR_PH]]:
+; NO-REG-PRESSURE-CHECK-NEXT: br label %[[LOOP:.*]]
+; NO-REG-PRESSURE-CHECK: [[LOOP]]:
+; NO-REG-PRESSURE-CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[IV_MUL:%.*]] = mul i64 [[IV]], 3
+; NO-REG-PRESSURE-CHECK-NEXT: [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]]
+; NO-REG-PRESSURE-CHECK-NEXT: [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0
+; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[A]], ptr [[A_GEP1]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1
+; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[B]], ptr [[B_GEP1]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2
+; NO-REG-PRESSURE-CHECK-NEXT: store i8 [[C]], ptr [[C_GEP1]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3
+; NO-REG-PRESSURE-CHECK-NEXT: [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4
+; NO-REG-PRESSURE-CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[IV]], 1024
+; NO-REG-PRESSURE-CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; NO-REG-PRESSURE-CHECK: [[EXIT]]:
+; NO-REG-PRESSURE-CHECK-NEXT: ret void
+;
entry:
br label %loop
>From e11d350814adf920c4cfb36856d1d3f671da9f75 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 12 Sep 2025 13:49:37 +0800
Subject: [PATCH 6/6] Update comment
---
llvm/include/llvm/Analysis/TargetTransformInfo.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index eb2013fd1f329..a6f4e51e258ab 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1847,8 +1847,8 @@ class TargetTransformInfo {
/// otherwise scalar epilogue loop.
LLVM_ABI bool preferEpilogueVectorization() const;
- /// \returns True if the loop vectorizer should discard any VFs that may have
- /// too high of a register pressure as determined by getNumberOfRegisters.
+ /// \returns True if the loop vectorizer should discard any VFs where the
+ /// maximum register pressure exceeds getNumberOfRegisters.
LLVM_ABI bool shouldConsiderVectorizationRegPressure() const;
/// \returns True if the target wants to expand the given reduction intrinsic
More information about the llvm-commits
mailing list