[llvm] [RISCV][VLOPT] Peek through copies in checkUsers (PR #127656)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 18 07:44:42 PST 2025
https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/127656
Currently if a user of an instruction isn't a vector pseudo we bail. For simple non-subreg virtual COPYs, we can peek through their uses by using a worklist.
This is extracted from a loop in TSVC2 (s273) that contains a fcmp + select, which produces a copy that doesn't seem to be coalesced away.
>From f337bb1b56f942f03662b34574f899f643c3ecd7 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 18 Feb 2025 23:33:29 +0800
Subject: [PATCH 1/2] Precommit tests
---
llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 34 +++++++++++++++++++++++
llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 38 ++++++++++++++++++++++++++
2 files changed, 72 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
index f0b05d2420b1a..0b65246765c1f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -217,3 +217,37 @@ define void @optimize_ternary_use(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b,
call void @llvm.riscv.vse(<vscale x 4 x i32> %3, ptr %p, iXLen %vl)
ret void
}
+
+; This function has a copy between two vrm2 virtual registers, make sure we can
+; reduce vl between it.
+define void @fadd_fcmp_select_copy(<vscale x 4 x float> %v, <vscale x 4 x i1> %c, ptr %p, iXLen %vl) {
+; NOVLOPT-LABEL: fadd_fcmp_select_copy:
+; NOVLOPT: # %bb.0:
+; NOVLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT: vfadd.vv v8, v8, v8
+; NOVLOPT-NEXT: fmv.w.x fa5, zero
+; NOVLOPT-NEXT: vmflt.vf v10, v8, fa5
+; NOVLOPT-NEXT: vmand.mm v10, v0, v10
+; NOVLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT: vse32.v v8, (a0)
+; NOVLOPT-NEXT: vsm.v v10, (a0)
+; NOVLOPT-NEXT: ret
+;
+; VLOPT-LABEL: fadd_fcmp_select_copy:
+; VLOPT: # %bb.0:
+; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vfadd.vv v8, v8, v8
+; VLOPT-NEXT: fmv.w.x fa5, zero
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT: vmflt.vf v10, v8, fa5
+; VLOPT-NEXT: vmand.mm v10, v0, v10
+; VLOPT-NEXT: vse32.v v8, (a0)
+; VLOPT-NEXT: vsm.v v10, (a0)
+; VLOPT-NEXT: ret
+ %fadd = fadd <vscale x 4 x float> %v, %v
+ %fcmp = fcmp olt <vscale x 4 x float> %fadd, zeroinitializer
+ %select = select <vscale x 4 x i1> %c, <vscale x 4 x i1> %fcmp, <vscale x 4 x i1> zeroinitializer
+ call void @llvm.riscv.vse(<vscale x 4 x float> %fadd, ptr %p, iXLen %vl)
+ call void @llvm.riscv.vsm(<vscale x 4 x i1> %select, ptr %p, iXLen %vl)
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 0475a988e9851..eaad479b59c1b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -290,3 +290,41 @@ body: |
%x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat
%y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
...
+---
+name: copy
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: copy
+ ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %y:vr = COPY %x
+ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+ %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ %y:vr = COPY %x
+ %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+...
+---
+name: copy_multiple_users
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: copy_multiple_users
+ ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %y:vr = COPY %x
+ ; CHECK-NEXT: %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
+ %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ %y:vr = COPY %x
+ %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+ %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
+...
+---
+name: copy_user_invalid_sew
+body: |
+ bb.0:
+ ; CHECK-LABEL: name: copy_user_invalid_sew
+ ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: %y:vr = COPY %x
+ ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */
+ %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ %y:vr = COPY %x
+ %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */
+...
>From 7e0aecd45ff993d08c18a9abcb53bc7a2bfe9f9e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 18 Feb 2025 23:39:12 +0800
Subject: [PATCH 2/2] [RISCV][VLOPT] Peek through copies in checkUsers
Currently if a user of an instruction isn't a vector pseudo we bail. For simple non-subreg virtual COPYs, we can peek through their uses by using a worklist.
This is extracted from a loop in TSVC2 (s273) that contains a fcmp + select, which produces a copy that doesn't seem to be coalesced away.
---
llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 17 ++++++++++++++++-
.../RISCV/rvv/rvv-peephole-vmerge-vops.ll | 4 ++--
llvm/test/CodeGen/RISCV/rvv/vl-opt.ll | 3 +--
llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 4 ++--
4 files changed, 21 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 1ba7f0b522a2b..67024d720ccfa 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1311,9 +1311,24 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
std::optional<MachineOperand> CommonVL;
- for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
+ SmallSetVector<MachineOperand *, 8> Worklist;
+ for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg()))
+ Worklist.insert(&UserOp);
+
+ while (!Worklist.empty()) {
+ MachineOperand &UserOp = *Worklist.pop_back_val();
const MachineInstr &UserMI = *UserOp.getParent();
LLVM_DEBUG(dbgs() << " Checking user: " << UserMI << "\n");
+
+ if (UserMI.isCopy() && UserMI.getOperand(0).getReg().isVirtual() &&
+ UserMI.getOperand(0).getSubReg() == RISCV::NoSubRegister &&
+ UserMI.getOperand(1).getSubReg() == RISCV::NoSubRegister) {
+ LLVM_DEBUG(dbgs() << " Peeking through uses of COPY\n");
+ for (auto &CopyUse : MRI->use_operands(UserMI.getOperand(0).getReg()))
+ Worklist.insert(&CopyUse);
+ continue;
+ }
+
if (mayReadPastVL(UserMI)) {
LLVM_DEBUG(dbgs() << " Abort because used by unsafe instruction\n");
return std::nullopt;
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 403cc0eb9dce1..c249b3c5376fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -901,9 +901,9 @@ define void @test_dag_loop() {
; CHECK-NEXT: vssubu.vx v12, v8, zero, v0.t
; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, ma
; CHECK-NEXT: vmseq.vv v0, v12, v8
-; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma
; CHECK-NEXT: vmv.v.i v8, 0
-; CHECK-NEXT: vsetivli zero, 0, e16, m8, tu, mu
+; CHECK-NEXT: vsetvli zero, zero, e16, m8, tu, mu
; CHECK-NEXT: vle16.v v8, (zero), v0.t
; CHECK-NEXT: vse16.v v8, (zero)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
index 0b65246765c1f..823c2bbd0c968 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -235,10 +235,9 @@ define void @fadd_fcmp_select_copy(<vscale x 4 x float> %v, <vscale x 4 x i1> %c
;
; VLOPT-LABEL: fadd_fcmp_select_copy:
; VLOPT: # %bb.0:
-; VLOPT-NEXT: vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; VLOPT-NEXT: vfadd.vv v8, v8, v8
; VLOPT-NEXT: fmv.w.x fa5, zero
-; VLOPT-NEXT: vsetvli zero, a1, e32, m2, ta, ma
; VLOPT-NEXT: vmflt.vf v10, v8, fa5
; VLOPT-NEXT: vmand.mm v10, v0, v10
; VLOPT-NEXT: vse32.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index eaad479b59c1b..abf4faa59a98e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -295,7 +295,7 @@ name: copy
body: |
bb.0:
; CHECK-LABEL: name: copy
- ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: %y:vr = COPY %x
; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
%x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
@@ -307,7 +307,7 @@ name: copy_multiple_users
body: |
bb.0:
; CHECK-LABEL: name: copy_multiple_users
- ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: %y:vr = COPY %x
; CHECK-NEXT: %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
; CHECK-NEXT: %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
More information about the llvm-commits
mailing list