[llvm] [LV][VPlan] Reduce register usage of VPEVLBasedIVPHIRecipe. (PR #154482)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 19 23:52:26 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Elvis Wang (ElvisWang123)
<details>
<summary>Changes</summary>
`VPEVLBasedIVPHIRecipe` will lower to VPInstruction scalar phi and
generate scalar phi. This recipe will only occupy a scalar register just
like other phi recipes.
This patch fix the register usage for `VPEVLBasedIVPHIRecipe` from vector
to scalar which is close to generated vector IR.
https://godbolt.org/z/6Mzd6W6ha shows that no register spills when
choosing `<vscale x 16>`.
Note that this test is basically copied from AArch64.
---
Full diff: https://github.com/llvm/llvm-project/pull/154482.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+1-1)
- (added) llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll (+37)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index b39231f106300..b46d99052a1dd 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -555,7 +555,7 @@ SmallVector<VPRegisterUsage, 8> llvm::calculateRegisterUsageForPlan(
if (VFs[J].isScalar() ||
isa<VPCanonicalIVPHIRecipe, VPReplicateRecipe, VPDerivedIVRecipe,
- VPScalarIVStepsRecipe>(R) ||
+ VPEVLBasedIVPHIRecipe, VPScalarIVStepsRecipe>(R) ||
(isa<VPInstruction>(R) &&
all_of(cast<VPSingleDefRecipe>(R)->users(),
[&](VPUser *U) {
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll
new file mode 100644
index 0000000000000..71b26aa77ce88
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/maxbandwidth-regpressure.ll
@@ -0,0 +1,37 @@
+; REQUIRES: asserts
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize,vplan -disable-output -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-REGS-VP
+; RUN: opt -passes=loop-vectorize -mtriple riscv64 -mattr=+v -vectorizer-maximize-bandwidth -debug-only=loop-vectorize -disable-output -force-target-num-vector-regs=1 -force-vector-interleave=1 -enable-epilogue-vectorization=false -S < %s 2>&1 | FileCheck %s --check-prefixes=CHECK-NOREGS-VP
+define i32 @dotp(ptr %a, ptr %b) {
+; CHECK-REGS-VP: LV(REG): VF = vscale x 16
+; CHECK-REGS-VP-NEXT: LV(REG): Found max usage: 2 item
+; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 6 registers
+; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::VRRC, 24 registers
+; CHECK-REGS-VP-NEXT: LV(REG): Found invariant usage: 1 item
+; CHECK-REGS-VP-NEXT: LV(REG): RegisterClass: RISCV::GPRRC, 1 registers
+; CHECK-REGS-VP: LV: Selecting VF: vscale x 16.
+;
+; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 8 because it uses too many registers
+; CHECK-NOREGS-VP: LV(REG): Not considering vector loop of width vscale x 16 because it uses too many registers
+; CHECK-NOREGS-VP: LV: Selecting VF: vscale x 4.
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %accum = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %gep.a = getelementptr i8, ptr %a, i64 %iv
+ %load.a = load i8, ptr %gep.a, align 1
+ %ext.a = zext i8 %load.a to i32
+ %gep.b = getelementptr i8, ptr %b, i64 %iv
+ %load.b = load i8, ptr %gep.b, align 1
+ %ext.b = zext i8 %load.b to i32
+ %mul = mul i32 %ext.b, %ext.a
+ %sub = sub i32 0, %mul
+ %add = add i32 %accum, %sub
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv.next, 1024
+ br i1 %exitcond.not, label %for.exit, label %for.body
+
+for.exit: ; preds = %for.body
+ ret i32 %add
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/154482
More information about the llvm-commits
mailing list