[llvm] [VPlan] Always consider register pressure on RISC-V (PR #156951)

Thu Sep 11 22:51:30 PDT 2025

https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/156951

>From 77df7b4bc901ddd2c5878a9ddd6c68e2e601c110 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 5 Sep 2025 01:25:12 +0800
Subject: [PATCH 1/6] Precommit test

---
 .../LoopVectorize/RISCV/reg-usage-prune-vf.ll | 139 ++++++++++++++++++
 1 file changed, 139 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll

diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
new file mode 100644
index 0000000000000..92f0a131e03d4
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -S < %s | FileCheck %s
+
+define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
+; CHECK-LABEL: define void @f(
+; CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK:       [[VECTOR_PH]]:
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <vscale x 8 x i64> [[TMP0]], splat (i64 2)
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <vscale x 8 x i64> [[TMP2]], splat (i64 3)
+; CHECK-NEXT:    [[INDUCTION1:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <vscale x 8 x i64> [[TMP4]], splat (i64 4)
+; CHECK-NEXT:    [[INDUCTION2:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP5]]
+; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; CHECK:       [[VECTOR_BODY]]:
+; CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
+; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 4, [[TMP7]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[TMP10:%.*]] = mul i64 3, [[TMP9]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[TMP12:%.*]] = mul i64 2, [[TMP11]]
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sub <vscale x 8 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP13]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT:    [[TMP15:%.*]] = sub <vscale x 8 x i64> [[VEC_IND3]], splat (i64 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP15]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP16]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT:    [[TMP17:%.*]] = sub <vscale x 8 x i64> [[VEC_IND4]], splat (i64 1)
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP17]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP18]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT:    [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0
+; CHECK-NEXT:    [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 24 x i8> @llvm.vector.interleave3.nxv24i8(<vscale x 8 x i8> [[WIDE_MASKED_GATHER]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER10]])
+; CHECK-NEXT:    call void @llvm.vp.store.nxv24i8.p0(<vscale x 24 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 24 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
+; CHECK-NEXT:    [[TMP22:%.*]] = zext i32 [[TMP6]] to i64
+; CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
+; CHECK-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
+; CHECK-NEXT:    [[VEC_IND_NEXT11]] = add <vscale x 8 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
+; CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <vscale x 8 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; CHECK-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK:       [[MIDDLE_BLOCK]]:
+; CHECK-NEXT:    br label %[[EXIT:.*]]
+; CHECK:       [[SCALAR_PH]]:
+; CHECK-NEXT:    br label %[[LOOP:.*]]
+; CHECK:       [[LOOP]]:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1
+; CHECK-NEXT:    [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]]
+; CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1
+; CHECK-NEXT:    [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1
+; CHECK-NEXT:    [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]]
+; CHECK-NEXT:    [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1
+; CHECK-NEXT:    [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1
+; CHECK-NEXT:    [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]]
+; CHECK-NEXT:    [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1
+; CHECK-NEXT:    [[IV_MUL:%.*]] = mul i64 [[IV]], 3
+; CHECK-NEXT:    [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]]
+; CHECK-NEXT:    [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0
+; CHECK-NEXT:    store i8 [[A]], ptr [[A_GEP1]], align 1
+; CHECK-NEXT:    [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1
+; CHECK-NEXT:    store i8 [[B]], ptr [[B_GEP1]], align 1
+; CHECK-NEXT:    [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2
+; CHECK-NEXT:    store i8 [[C]], ptr [[C_GEP1]], align 1
+; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; CHECK-NEXT:    [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2
+; CHECK-NEXT:    [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3
+; CHECK-NEXT:    [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[IV]], 1024
+; CHECK-NEXT:    br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       [[EXIT]]:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %loop
+
+loop:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+  %wide.iv.0 = phi i64 [ 0, %entry ], [ %wide.iv.0.next, %loop ]
+  %wide.iv.1 = phi i64 [ 0, %entry ], [ %wide.iv.1.next, %loop ]
+  %wide.iv.2 = phi i64 [ 0, %entry ], [ %wide.iv.2.next, %loop ]
+
+  %wide.iv.0.sub = sub i64 %wide.iv.0, 1
+  %a.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.0.sub
+  %a = load i8, ptr %a.gep0
+
+  %wide.iv.1.sub = sub i64 %wide.iv.1, 1
+  %b.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.1.sub
+  %b = load i8, ptr %b.gep0
+
+  %wide.iv.2.sub = sub i64 %wide.iv.2, 1
+  %c.gep0 = getelementptr i8, ptr %p0, i64 %wide.iv.2.sub
+  %c = load i8, ptr %c.gep0
+
+  %iv.mul = mul i64 %iv, 3
+  %base = getelementptr i8, ptr %p1, i64 %iv.mul
+
+  %a.gep1 = getelementptr i8, ptr %base, i8 0
+  store i8 %a, ptr %a.gep1
+
+  %b.gep1 = getelementptr i8, ptr %base, i8 1
+  store i8 %b, ptr %b.gep1
+
+  %c.gep1 = getelementptr i8, ptr %base, i8 2
+  store i8 %c, ptr %c.gep1
+
+  %iv.next = add i64 %iv, 1
+  %wide.iv.0.next = add i64 %wide.iv.0, 2
+  %wide.iv.1.next = add i64 %wide.iv.1, 3
+  %wide.iv.2.next = add i64 %wide.iv.2, 4
+  %done = icmp eq i64 %iv, 1024
+  br i1 %done, label %exit, label %loop
+
+exit:
+  ret void
+}

>From b6a52d8140a491187babb61f46ec4817311dadee Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 5 Sep 2025 02:53:14 +0800
Subject: [PATCH 2/6] [VPlan] Always consider register pressure on RISC-V

---
 .../llvm/Analysis/TargetTransformInfo.h       |  4 ++
 .../llvm/Analysis/TargetTransformInfoImpl.h   |  2 +
 llvm/lib/Analysis/TargetTransformInfo.cpp     |  4 ++
 .../Target/RISCV/RISCVTargetTransformInfo.h   |  2 +
 .../Transforms/Vectorize/LoopVectorize.cpp    |  3 +
 .../LoopVectorize/RISCV/reg-usage-bf16.ll     | 11 ++-
 .../LoopVectorize/RISCV/reg-usage-f16.ll      | 21 +++---
 .../LoopVectorize/RISCV/reg-usage-prune-vf.ll | 66 +++++++++---------
 .../LoopVectorize/RISCV/reg-usage.ll          | 68 +++++++++++--------
 9 files changed, 100 insertions(+), 81 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 1f95d13815f8e..86ecce56f3128 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1843,6 +1843,10 @@ class TargetTransformInfo {
   /// otherwise scalar epilogue loop.
   LLVM_ABI bool preferEpilogueVectorization() const;
 
+  /// \returns True if the loop vectorizer should discard any VFs that may have
+  /// too high of a register pressure as determined by getNumberOfRegisters.
+  LLVM_ABI bool shouldConsiderVectorizationRegPressure() const;
+
   /// \returns True if the target wants to expand the given reduction intrinsic
   /// into a shuffle sequence.
   LLVM_ABI bool shouldExpandReduction(const IntrinsicInst *II) const;
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index b58386b94bba4..566e1cf51631a 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -1105,6 +1105,8 @@ class TargetTransformInfoImplBase {
 
   virtual bool preferEpilogueVectorization() const { return true; }
 
+  virtual bool shouldConsiderVectorizationRegPressure() const { return false; }
+
   virtual bool shouldExpandReduction(const IntrinsicInst *II) const {
     return true;
   }
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index 899806bf37348..09b50c5270e57 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1425,6 +1425,10 @@ bool TargetTransformInfo::preferEpilogueVectorization() const {
   return TTIImpl->preferEpilogueVectorization();
 }
 
+bool TargetTransformInfo::shouldConsiderVectorizationRegPressure() const {
+  return TTIImpl->shouldConsiderVectorizationRegPressure();
+}
+
 TargetTransformInfo::VPLegalization
 TargetTransformInfo::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
   return TTIImpl->getVPLegalizationStrategy(VPI);
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
index 6bd7d51daff69..47e0a250d285a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h
@@ -141,6 +141,8 @@ class RISCVTTIImpl final : public BasicTTIImplBase<RISCVTTIImpl> {
     return false;
   }
 
+  bool shouldConsiderVectorizationRegPressure() const override { return true; }
+
   InstructionCost
   getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
                         unsigned AddressSpace,
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5a1d1e75e2d5d..c0b1d7421af31 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3702,6 +3702,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
 
 bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF(
     ElementCount VF) {
+  if (TTI.shouldConsiderVectorizationRegPressure())
+    return true;
+
   if (!useMaxBandwidth(VF.isScalable()
                            ? TargetTransformInfo::RGK_ScalableVector
                            : TargetTransformInfo::RGK_FixedWidthVector))
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
index 346f1cbcc7e3d..097f05d222cf6 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-bf16.ll
@@ -1,14 +1,11 @@
 ; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
-
-; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
-; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
-; and remove.
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfbfmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s
 
 define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
 ; CHECK-LABEL: add
-; CHECK:  LV(REG): Found max usage: 2 item
-; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK:       LV(REG): VF = vscale x 4
+; CHECK-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
 ; CHECK-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
index b25bc485a9ca7..8bbfdf39a0624 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-f16.ll
@@ -1,20 +1,19 @@
 ; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -prefer-predicate-over-epilogue=scalar-epilogue -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
-
-; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
-; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
-; and remove.
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfh -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFH
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v,+zvfhmin -debug-only=loop-vectorize,vplan --disable-output -riscv-v-register-bit-width-lmul=1 -S < %s 2>&1 | FileCheck %s --check-prefix=ZVFHMIN
 
 define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
-; CHECK-LABEL: add
-; ZVFH:  LV(REG): Found max usage: 2 item
-; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; ZVFH-LABEL: add
+; ZVFH:       LV(REG): VF = vscale x 4
+; ZVFH-NEXT:  LV(REG): Found max usage: 2 item
+; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
 ; ZVFH-NEXT:  LV(REG): Found invariant usage: 1 item
 ; ZVFH-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; ZVFHMIN:  LV(REG): Found max usage: 2 item
-; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; ZVFHMIN-LABEL: add
+; ZVFHMIN:       LV(REG): VF = vscale x 4
+; ZVFHMIN-NEXT:  LV(REG): Found max usage: 2 item
+; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
 ; ZVFHMIN-NEXT:  LV(REG): Found invariant usage: 1 item
 ; ZVFHMIN-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
index 92f0a131e03d4..72acffeb60c6c 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
@@ -7,56 +7,56 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
 ; CHECK-NEXT:  [[ENTRY:.*:]]
 ; CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
 ; CHECK:       [[VECTOR_PH]]:
-; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT:    [[TMP1:%.*]] = mul <vscale x 8 x i64> [[TMP0]], splat (i64 2)
-; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP1]]
-; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT:    [[TMP3:%.*]] = mul <vscale x 8 x i64> [[TMP2]], splat (i64 3)
-; CHECK-NEXT:    [[INDUCTION1:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP3]]
-; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
-; CHECK-NEXT:    [[TMP5:%.*]] = mul <vscale x 8 x i64> [[TMP4]], splat (i64 4)
-; CHECK-NEXT:    [[INDUCTION2:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP5]]
+; CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP1:%.*]] = mul <vscale x 4 x i64> [[TMP0]], splat (i64 2)
+; CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP3:%.*]] = mul <vscale x 4 x i64> [[TMP2]], splat (i64 3)
+; CHECK-NEXT:    [[INDUCTION1:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP3]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <vscale x 4 x i64> [[TMP4]], splat (i64 4)
+; CHECK-NEXT:    [[INDUCTION2:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP5]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 4, i1 true)
 ; CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
 ; CHECK-NEXT:    [[TMP8:%.*]] = mul i64 4, [[TMP7]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP8]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
 ; CHECK-NEXT:    [[TMP10:%.*]] = mul i64 3, [[TMP9]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP10]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
 ; CHECK-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP6]] to i64
 ; CHECK-NEXT:    [[TMP12:%.*]] = mul i64 2, [[TMP11]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP13:%.*]] = sub <vscale x 8 x i64> [[VEC_IND]], splat (i64 1)
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP13]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
-; CHECK-NEXT:    [[TMP15:%.*]] = sub <vscale x 8 x i64> [[VEC_IND3]], splat (i64 1)
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP15]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP16]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
-; CHECK-NEXT:    [[TMP17:%.*]] = sub <vscale x 8 x i64> [[VEC_IND4]], splat (i64 1)
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP17]]
-; CHECK-NEXT:    [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP18]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP12]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sub <vscale x 4 x i64> [[VEC_IND]], splat (i64 1)
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP13]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP14]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT:    [[TMP15:%.*]] = sub <vscale x 4 x i64> [[VEC_IND3]], splat (i64 1)
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP15]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
+; CHECK-NEXT:    [[TMP17:%.*]] = sub <vscale x 4 x i64> [[VEC_IND4]], splat (i64 1)
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 4 x i64> [[TMP17]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 4 x i8> @llvm.vp.gather.nxv4i8.nxv4p0(<vscale x 4 x ptr> align 1 [[TMP18]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP6]])
 ; CHECK-NEXT:    [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3
 ; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0
 ; CHECK-NEXT:    [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3
-; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 24 x i8> @llvm.vector.interleave3.nxv24i8(<vscale x 8 x i8> [[WIDE_MASKED_GATHER]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER10]])
-; CHECK-NEXT:    call void @llvm.vp.store.nxv24i8.p0(<vscale x 24 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 24 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
+; CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 12 x i8> @llvm.vector.interleave3.nxv12i8(<vscale x 4 x i8> [[WIDE_MASKED_GATHER]], <vscale x 4 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 4 x i8> [[WIDE_MASKED_GATHER10]])
+; CHECK-NEXT:    call void @llvm.vp.store.nxv12i8.p0(<vscale x 12 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 12 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
 ; CHECK-NEXT:    [[TMP22:%.*]] = zext i32 [[TMP6]] to i64
 ; CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
 ; CHECK-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
-; CHECK-NEXT:    [[VEC_IND_NEXT11]] = add <vscale x 8 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
-; CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <vscale x 8 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
+; CHECK-NEXT:    [[VEC_IND_NEXT11]] = add <vscale x 4 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
+; CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <vscale x 4 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
 ; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
 ; CHECK-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
index 116ccc9961795..99139da67bb78 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage.ll
@@ -5,50 +5,54 @@
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-SCALAR
 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
 ; RUN:   -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN:   -riscv-v-register-bit-width-lmul=1 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN:   -riscv-v-register-bit-width-lmul=1 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL1
 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
 ; RUN:   -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN:   -riscv-v-register-bit-width-lmul=2 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN:   -riscv-v-register-bit-width-lmul=2 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL2
 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
 ; RUN:   -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN:   -riscv-v-register-bit-width-lmul=4 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN:   -riscv-v-register-bit-width-lmul=4 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL4
 ; RUN: opt -passes=loop-vectorize -mtriple riscv64-linux-gnu \
 ; RUN:   -mattr=+v,+d -debug-only=loop-vectorize,vplan --disable-output \
-; RUN:   -riscv-v-register-bit-width-lmul=8 -prefer-predicate-over-epilogue=scalar-epilogue \
+; RUN:   -riscv-v-register-bit-width-lmul=8 \
 ; RUN:   -S < %s 2>&1 | FileCheck %s --check-prefix=CHECK-LMUL8
 
-; TODO: -prefer-predicate-over-epilogue=scalar-epilogue was added to allow
-; unrolling. Calculate register pressure for all VPlans, not just unrolled ones,
-; and remove.
-
 define void @add(ptr noalias nocapture readonly %src1, ptr noalias nocapture readonly %src2, i32 signext %size, ptr noalias nocapture writeonly %result) {
-; CHECK-LABEL: add
+; CHECK-SCALAR-LABEL: add
 ; CHECK-SCALAR:      LV(REG): VF = 1
 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 2 item
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::FPRRC, 2 registers
 ; CHECK-SCALAR-NEXT: LV(REG): Found invariant usage: 1 item
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL1:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL1-LABEL: add
+; CHECK-LMUL1:       LV(REG): VF = vscale x 2
+; CHECK-LMUL1-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
 ; CHECK-LMUL1-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL2:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL2-LABEL: add
+; CHECK-LMUL2:       LV(REG): VF = vscale x 4
+; CHECK-LMUL2-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
 ; CHECK-LMUL2-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL4:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL4-LABEL: add
+; CHECK-LMUL4:       LV(REG): VF = vscale x 8
+; CHECK-LMUL4-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
 ; CHECK-LMUL4-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-LMUL8:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
+; CHECK-LMUL8-LABEL: add
+; CHECK-LMUL8:       LV(REG): VF = vscale x 16
+; CHECK-LMUL8-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
 ; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 16 registers
 ; CHECK-LMUL8-NEXT:  LV(REG): Found invariant usage: 1 item
 ; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
@@ -76,22 +80,26 @@ for.body:
 }
 
 define void @goo(ptr nocapture noundef %a, i32 noundef signext %n) {
-; CHECK-LABEL: goo
+; CHECK-SCALAR-LABEL: goo
 ; CHECK-SCALAR:      LV(REG): VF = 1
 ; CHECK-SCALAR-NEXT: LV(REG): Found max usage: 1 item
 ; CHECK-SCALAR-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 3 registers
-; CHECK-LMUL1:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 1 registers
-; CHECK-LMUL2:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
-; CHECK-LMUL4:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
-; CHECK-LMUL8:       LV(REG): Found max usage: 2 item
-; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 2 registers
-; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
+; CHECK-LMUL1:       LV(REG): VF = vscale x 2
+; CHECK-LMUL1-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL1-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 2 registers
+; CHECK-LMUL2:       LV(REG): VF = vscale x 4
+; CHECK-LMUL2-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL2-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 4 registers
+; CHECK-LMUL4:       LV(REG): VF = vscale x 8
+; CHECK-LMUL4-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL4-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 8 registers
+; CHECK-LMUL8:       LV(REG): VF = vscale x 16
+; CHECK-LMUL8-NEXT:  LV(REG): Found max usage: 2 item
+; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-LMUL8-NEXT:  LV(REG): RegisterClass: RISCV::VRRC, 16 registers
 entry:
   %cmp3 = icmp sgt i32 %n, 0
   br i1 %cmp3, label %for.body.preheader, label %for.cond.cleanup

>From 57d26c53251cfb2437cc265037b4906c2843af37 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 9 Sep 2025 12:33:00 +0800
Subject: [PATCH 3/6] Add flag to override setting

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index c0b1d7421af31..e9071a98683c2 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -393,6 +393,10 @@ static cl::opt<bool> EnableEarlyExitVectorization(
     cl::desc(
         "Enable vectorization of early exit loops with uncountable exits."));
 
+static cl::opt<bool> ConsiderRegPressure(
+    "vectorizer-consider-reg-pressure", cl::init(true), cl::Hidden,
+    cl::desc("Discard VFs if their register pressure is too high."));
+
 // Likelyhood of bypassing the vectorized loop because there are zero trips left
 // after prolog. See `emitIterationCountCheck`.
 static constexpr uint32_t MinItersBypassWeights[] = {1, 127};
@@ -3702,6 +3706,9 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
 
 bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF(
     ElementCount VF) {
+  if (ConsiderRegPressure.getNumOccurrences())
+    return ConsiderRegPressure;
+
   if (TTI.shouldConsiderVectorizationRegPressure())
     return true;
 

>From 9c2cc07923bd3398269ba644cf10919ba7c3663c Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 10 Sep 2025 22:17:07 +0800
Subject: [PATCH 4/6] Set flag default to false, add TODO

---
 llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index aa461d36edc72..c328ff7881d0e 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -394,7 +394,7 @@ static cl::opt<bool> EnableEarlyExitVectorization(
         "Enable vectorization of early exit loops with uncountable exits."));
 
 static cl::opt<bool> ConsiderRegPressure(
-    "vectorizer-consider-reg-pressure", cl::init(true), cl::Hidden,
+    "vectorizer-consider-reg-pressure", cl::init(false), cl::Hidden,
     cl::desc("Discard VFs if their register pressure is too high."));
 
 // Likelyhood of bypassing the vectorized loop because there are zero trips left
@@ -3700,6 +3700,8 @@ bool LoopVectorizationCostModel::shouldConsiderRegPressureForVF(
   if (ConsiderRegPressure.getNumOccurrences())
     return ConsiderRegPressure;
 
+  // TODO: We should eventually consider register pressure for all targets. The
+  // TTI hook is temporary whilst target-specific issues are being fixed.
   if (TTI.shouldConsiderVectorizationRegPressure())
     return true;
 

>From 9f620faf550390cdc175c07af4f5d13b672e57a2 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Thu, 11 Sep 2025 11:36:52 +0800
Subject: [PATCH 5/6] Add tests for flag

---
 .../LoopVectorize/RISCV/reg-usage-prune-vf.ll | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)

diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
index 72acffeb60c6c..42f12ec2e4859 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-prune-vf.ll
@@ -1,5 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
 ; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -S < %s | FileCheck %s
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=true -S < %s | FileCheck %s
+; RUN: opt -p loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-consider-reg-pressure=false -S < %s | FileCheck %s --check-prefix=NO-REG-PRESSURE-CHECK
 
 define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
 ; CHECK-LABEL: define void @f(
@@ -94,6 +96,98 @@ define void @f(ptr noalias %p0, ptr noalias %p1, ptr noalias %p2) {
 ; CHECK:       [[EXIT]]:
 ; CHECK-NEXT:    ret void
 ;
+; NO-REG-PRESSURE-CHECK-LABEL: define void @f(
+; NO-REG-PRESSURE-CHECK-SAME: ptr noalias [[P0:%.*]], ptr noalias [[P1:%.*]], ptr noalias [[P2:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-REG-PRESSURE-CHECK-NEXT:  [[ENTRY:.*:]]
+; NO-REG-PRESSURE-CHECK-NEXT:    br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; NO-REG-PRESSURE-CHECK:       [[VECTOR_PH]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP1:%.*]] = mul <vscale x 8 x i64> [[TMP0]], splat (i64 2)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INDUCTION:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP1]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP2:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP3:%.*]] = mul <vscale x 8 x i64> [[TMP2]], splat (i64 3)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INDUCTION1:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP3]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP4:%.*]] = call <vscale x 8 x i64> @llvm.stepvector.nxv8i64()
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP5:%.*]] = mul <vscale x 8 x i64> [[TMP4]], splat (i64 4)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INDUCTION2:%.*]] = add <vscale x 8 x i64> zeroinitializer, [[TMP5]]
+; NO-REG-PRESSURE-CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
+; NO-REG-PRESSURE-CHECK:       [[VECTOR_BODY]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND3:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION1]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND4:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION2]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT12:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[AVL:%.*]] = phi i64 [ 1025, %[[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP7:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP8:%.*]] = mul i64 4, [[TMP7]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP8]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP9:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP10:%.*]] = mul i64 3, [[TMP9]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLATINSERT5:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP10]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLAT6:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT5]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP11:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP12:%.*]] = mul i64 2, [[TMP11]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP12]], i64 0
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT7]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP13:%.*]] = sub <vscale x 8 x i64> [[VEC_IND]], splat (i64 1)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP13]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP14]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP15:%.*]] = sub <vscale x 8 x i64> [[VEC_IND3]], splat (i64 1)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP15]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP16]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP17:%.*]] = sub <vscale x 8 x i64> [[VEC_IND4]], splat (i64 1)
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i8, ptr [[P0]], <vscale x 8 x i64> [[TMP17]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_MASKED_GATHER10:%.*]] = call <vscale x 8 x i8> @llvm.vp.gather.nxv8i8.nxv8p0(<vscale x 8 x ptr> align 1 [[TMP18]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP6]])
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP19:%.*]] = mul i64 [[EVL_BASED_IV]], 3
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP20:%.*]] = getelementptr i8, ptr [[P1]], i64 [[TMP19]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP21:%.*]] = getelementptr i8, ptr [[TMP20]], i8 0
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INTERLEAVE_EVL:%.*]] = mul nuw nsw i32 [[TMP6]], 3
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INTERLEAVED_VEC:%.*]] = call <vscale x 24 x i8> @llvm.vector.interleave3.nxv24i8(<vscale x 8 x i8> [[WIDE_MASKED_GATHER]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER9]], <vscale x 8 x i8> [[WIDE_MASKED_GATHER10]])
+; NO-REG-PRESSURE-CHECK-NEXT:    call void @llvm.vp.store.nxv24i8.p0(<vscale x 24 x i8> [[INTERLEAVED_VEC]], ptr align 1 [[TMP21]], <vscale x 24 x i1> splat (i1 true), i32 [[INTERLEAVE_EVL]])
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP22:%.*]] = zext i32 [[TMP6]] to i64
+; NO-REG-PRESSURE-CHECK-NEXT:    [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT8]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND_NEXT11]] = add <vscale x 8 x i64> [[VEC_IND3]], [[BROADCAST_SPLAT6]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[VEC_IND_NEXT12]] = add <vscale x 8 x i64> [[VEC_IND4]], [[BROADCAST_SPLAT]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[AVL_NEXT]], 0
+; NO-REG-PRESSURE-CHECK-NEXT:    br i1 [[TMP23]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; NO-REG-PRESSURE-CHECK:       [[MIDDLE_BLOCK]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    br label %[[EXIT:.*]]
+; NO-REG-PRESSURE-CHECK:       [[SCALAR_PH]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    br label %[[LOOP:.*]]
+; NO-REG-PRESSURE-CHECK:       [[LOOP]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_0:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_0_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_1:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_1_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_2:%.*]] = phi i64 [ 0, %[[SCALAR_PH]] ], [ [[WIDE_IV_2_NEXT:%.*]], %[[LOOP]] ]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_0_SUB:%.*]] = sub i64 [[WIDE_IV_0]], 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[A_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_0_SUB]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[A:%.*]] = load i8, ptr [[A_GEP0]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_1_SUB:%.*]] = sub i64 [[WIDE_IV_1]], 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[B_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_1_SUB]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[B:%.*]] = load i8, ptr [[B_GEP0]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_2_SUB:%.*]] = sub i64 [[WIDE_IV_2]], 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[C_GEP0:%.*]] = getelementptr i8, ptr [[P0]], i64 [[WIDE_IV_2_SUB]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[C:%.*]] = load i8, ptr [[C_GEP0]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[IV_MUL:%.*]] = mul i64 [[IV]], 3
+; NO-REG-PRESSURE-CHECK-NEXT:    [[BASE:%.*]] = getelementptr i8, ptr [[P1]], i64 [[IV_MUL]]
+; NO-REG-PRESSURE-CHECK-NEXT:    [[A_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 0
+; NO-REG-PRESSURE-CHECK-NEXT:    store i8 [[A]], ptr [[A_GEP1]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[B_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 1
+; NO-REG-PRESSURE-CHECK-NEXT:    store i8 [[B]], ptr [[B_GEP1]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[C_GEP1:%.*]] = getelementptr i8, ptr [[BASE]], i8 2
+; NO-REG-PRESSURE-CHECK-NEXT:    store i8 [[C]], ptr [[C_GEP1]], align 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 1
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_0_NEXT]] = add i64 [[WIDE_IV_0]], 2
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_1_NEXT]] = add i64 [[WIDE_IV_1]], 3
+; NO-REG-PRESSURE-CHECK-NEXT:    [[WIDE_IV_2_NEXT]] = add i64 [[WIDE_IV_2]], 4
+; NO-REG-PRESSURE-CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[IV]], 1024
+; NO-REG-PRESSURE-CHECK-NEXT:    br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP4:![0-9]+]]
+; NO-REG-PRESSURE-CHECK:       [[EXIT]]:
+; NO-REG-PRESSURE-CHECK-NEXT:    ret void
+;
 entry:
   br label %loop
 

>From e11d350814adf920c4cfb36856d1d3f671da9f75 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Fri, 12 Sep 2025 13:49:37 +0800
Subject: [PATCH 6/6] Update comment

---
 llvm/include/llvm/Analysis/TargetTransformInfo.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index eb2013fd1f329..a6f4e51e258ab 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1847,8 +1847,8 @@ class TargetTransformInfo {
   /// otherwise scalar epilogue loop.
   LLVM_ABI bool preferEpilogueVectorization() const;
 
-  /// \returns True if the loop vectorizer should discard any VFs that may have
-  /// too high of a register pressure as determined by getNumberOfRegisters.
+  /// \returns True if the loop vectorizer should discard any VFs where the
+  /// maximum register pressure exceeds getNumberOfRegisters.
   LLVM_ABI bool shouldConsiderVectorizationRegPressure() const;
 
   /// \returns True if the target wants to expand the given reduction intrinsic