[llvm] [LV][VPlan] Reduce register usage of VPEVLBasedIVPHIRecipe. (PR #154482)

Wed Aug 20 15:56:37 PDT 2025

https://github.com/ElvisWang123 updated https://github.com/llvm/llvm-project/pull/154482

>From 3990d82c8e6eff13cfebf58832c2e4f9b0be9617 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 19 Aug 2025 23:19:59 -0700
Subject: [PATCH 1/4] Precommit test case.

---
 .../RISCV/maxbandwidth-regpressure.ll         | 38 +++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll

diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll
new file mode 100644
index 0000000000000..af4d76d853bfd
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll
@@ -0,0 +1,38 @@
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize -disable-output -force-target-num-vector-regs=1 -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOREGS-VP
+define i32 @dotp(ptr %a, ptr %b) {
+; CHECK-REGS-VP:      LV(REG): VF = vscale x 16
+; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
+; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 40 registers
+; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item
+; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
+; CHECK-REGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers
+; CHECK-REGS-VP: LV: Selecting VF: vscale x 8.
+;
+; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 8 because it uses too many registers
+; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers
+; CHECK-NOREGS-VP: LV: Selecting VF: vscale x 4.
+entry:
+  br label %for.body
+
+for.body:                                         ; preds = %for.body, %entry
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  %gep.a = getelementptr i8, ptr %a, i64 %iv
+  %load.a = load i8, ptr %gep.a, align 1
+  %ext.a = zext i8 %load.a to i32
+  %gep.b = getelementptr i8, ptr %b, i64 %iv
+  %load.b = load i8, ptr %gep.b, align 1
+  %ext.b = zext i8 %load.b to i32
+  %mul = mul i32 %ext.b, %ext.a
+  %sub = sub i32 0, %mul
+  %add = add i32 %accum, %sub
+  %iv.next = add i64 %iv, 1
+  %exitcond.not = icmp eq i64 %iv.next, 1024
+  br i1 %exitcond.not, label %for.exit, label %for.body
+
+for.exit:                        ; preds = %for.body
+  ret i32 %add
+}

>From ec2e9aa7a494a1c18a56e9d87b756d0594595140 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Tue, 19 Aug 2025 23:33:55 -0700
Subject: [PATCH 2/4] [LV][VPlan] Reduce register usage of
 VPEVLBasedIVPHIRecipe.

VPEVLBasedIVPHIRecipe will lower to VPInstruction scalar phi and
generate scalar phi. This recipe will only use a scalar register just
like other phi recipes.

This patch fix the register usage for VPEVLBasedIVPHIRecipe from vector
to scalar which is close to generated vector IR.

https://godbolt.org/z/6Mzd6W6ha shows that no register spills when
choosing <vscale x 16>.
---
 llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp            | 2 +-
 .../LoopVectorize/RISCV/maxbandwidth-regpressure.ll        | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index b39231f106300..b46d99052a1dd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -555,7 +555,7 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
 
         if (VFs[J].isScalar() ||
             isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
-                VPScalarIVStepsRecipe>(R) ||
+                VPEVLBasedIVPHIRecipe, VPScalarIVStepsRecipe>(R) ||
             (isa<VPInstruction>(R) &&
              all_of(cast<VPSingleDefRecipe>(R)->users(),
                     [&](VPUser *U) {
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll
index af4d76d853bfd..71b26aa77ce88 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll
@@ -4,12 +4,11 @@
 define i32 @dotp(ptr %a, ptr %b) {
 ; CHECK-REGS-VP:      LV(REG): VF = vscale x 16
 ; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item
-; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 5 registers
-; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 40 registers
+; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers
 ; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item
 ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-REGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers
-; CHECK-REGS-VP: LV: Selecting VF: vscale x 8.
+; CHECK-REGS-VP: LV: Selecting VF: vscale x 16.
 ;
 ; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 8 because it uses too many registers
 ; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers

>From c6eb21b7f8cfb9fa060a467d2a03eee02037f2c0 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 20 Aug 2025 00:59:16 -0700
Subject: [PATCH 3/4] address comments.

---
 ...{maxbandwidth-regpressure.ll => reg-usage-maxbandwidth.ll} | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename llvm/test/Transforms/LoopVectorize/RISCV/{maxbandwidth-regpressure.ll => reg-usage-maxbandwidth.ll} (87%)

diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll
similarity index 87%
rename from llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll
rename to llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll
index 71b26aa77ce88..8f9f543149285 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize -disable-output -force-target-num-vector-regs=1 -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOREGS-VP
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize -disable-output -force-target-num-vector-regs=1 -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOREGS-VP
 define i32 @dotp(ptr %a, ptr %b) {
 ; CHECK-REGS-VP:      LV(REG): VF = vscale x 16
 ; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item

>From 0a10d125c0f7138368fa4b641e2458a7f7aa03c3 Mon Sep 17 00:00:00 2001
From: Elvis Wang <elvis.wang at sifive.com>
Date: Wed, 20 Aug 2025 02:16:39 -0700
Subject: [PATCH 4/4] !fixup, add newline and remove unneed runs.

---
 .../LoopVectorize/RISCV/reg-usage-maxbandwidth.ll         | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll
index 8f9f543149285..6bb0d64314d3e 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/reg-usage-maxbandwidth.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP
-; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize -disable-output -force-target-num-vector-regs=1 -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOREGS-VP
+
 define i32 @dotp(ptr %a, ptr %b) {
 ; CHECK-REGS-VP:      LV(REG): VF = vscale x 16
 ; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item
@@ -8,11 +8,7 @@ define i32 @dotp(ptr %a, ptr %b) {
 ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers
 ; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item
 ; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
-; CHECK-REGS-VP: LV: Selecting VF: vscale x 16.
-;
-; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 8 because it uses too many registers
-; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers
-; CHECK-NOREGS-VP: LV: Selecting VF: vscale x 4.
+; CHECK-REGS-VP:      LV: Selecting VF: vscale x 16.
 entry:
   br label %for.body