[llvm] [RISCV][VLOPT] Peek through copies in checkUsers (PR #127656)

Luke Lau via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 18 07:44:42 PST 2025


https://github.com/lukel97 created https://github.com/llvm/llvm-project/pull/127656

Currently if a user of an instruction isn't a vector pseudo we bail. For simple non-subreg virtual COPYs, we can peek through their uses by using a worklist.

This is extracted from a loop in TSVC2 (s273) that contains a fcmp + select, which produces a copy that doesn't seem to be coalesced away.



>From f337bb1b56f942f03662b34574f899f643c3ecd7 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 18 Feb 2025 23:33:29 +0800
Subject: [PATCH 1/2] Precommit tests

---
 llvm/test/CodeGen/RISCV/rvv/vl-opt.ll  | 34 +++++++++++++++++++++++
 llvm/test/CodeGen/RISCV/rvv/vl-opt.mir | 38 ++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
index f0b05d2420b1a..0b65246765c1f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -217,3 +217,37 @@ define void @optimize_ternary_use(<vscale x 4 x i16> %a, <vscale x 4 x i32> %b,
   call void @llvm.riscv.vse(<vscale x 4 x i32> %3, ptr %p, iXLen %vl)
   ret void
 }
+
+; This function has a copy between two vrm2 virtual registers, make sure we can
+; reduce vl between it.
+define void @fadd_fcmp_select_copy(<vscale x 4 x float> %v, <vscale x 4 x i1> %c, ptr %p, iXLen %vl) {
+; NOVLOPT-LABEL: fadd_fcmp_select_copy:
+; NOVLOPT:       # %bb.0:
+; NOVLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vfadd.vv v8, v8, v8
+; NOVLOPT-NEXT:    fmv.w.x fa5, zero
+; NOVLOPT-NEXT:    vmflt.vf v10, v8, fa5
+; NOVLOPT-NEXT:    vmand.mm v10, v0, v10
+; NOVLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; NOVLOPT-NEXT:    vse32.v v8, (a0)
+; NOVLOPT-NEXT:    vsm.v v10, (a0)
+; NOVLOPT-NEXT:    ret
+;
+; VLOPT-LABEL: fadd_fcmp_select_copy:
+; VLOPT:       # %bb.0:
+; VLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vfadd.vv v8, v8, v8
+; VLOPT-NEXT:    fmv.w.x fa5, zero
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
+; VLOPT-NEXT:    vmflt.vf v10, v8, fa5
+; VLOPT-NEXT:    vmand.mm v10, v0, v10
+; VLOPT-NEXT:    vse32.v v8, (a0)
+; VLOPT-NEXT:    vsm.v v10, (a0)
+; VLOPT-NEXT:    ret
+  %fadd = fadd <vscale x 4 x float> %v, %v
+  %fcmp = fcmp olt <vscale x 4 x float> %fadd, zeroinitializer
+  %select = select <vscale x 4 x i1> %c, <vscale x 4 x i1> %fcmp, <vscale x 4 x i1> zeroinitializer
+  call void @llvm.riscv.vse(<vscale x 4 x float> %fadd, ptr %p, iXLen %vl)
+  call void @llvm.riscv.vsm(<vscale x 4 x i1> %select, ptr %p, iXLen %vl)
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 0475a988e9851..eaad479b59c1b 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -290,3 +290,41 @@ body: |
     %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def $vxsat
     %y:vr = PseudoVADD_VV_M1 $noreg, %x, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
 ...
+---
+name: copy
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: copy
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = COPY %x
+    ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    %y:vr = COPY %x
+    %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+...
+---
+name: copy_multiple_users
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: copy_multiple_users
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = COPY %x
+    ; CHECK-NEXT: %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    %y:vr = COPY %x
+    %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
+...
+---
+name: copy_user_invalid_sew
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: copy_user_invalid_sew
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: %y:vr = COPY %x
+    ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */
+    %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    %y:vr = COPY %x
+    %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 4 /* e16 */, 0 /* tu, mu */
+...

>From 7e0aecd45ff993d08c18a9abcb53bc7a2bfe9f9e Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Tue, 18 Feb 2025 23:39:12 +0800
Subject: [PATCH 2/2] [RISCV][VLOPT] Peek through copies in checkUsers

Currently if a user of an instruction isn't a vector pseudo we bail. For simple non-subreg virtual COPYs, we can peek through their uses by using a worklist.

This is extracted from a loop in TSVC2 (s273) that contains a fcmp + select, which produces a copy that doesn't seem to be coalesced away.
---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp      | 17 ++++++++++++++++-
 .../RISCV/rvv/rvv-peephole-vmerge-vops.ll       |  4 ++--
 llvm/test/CodeGen/RISCV/rvv/vl-opt.ll           |  3 +--
 llvm/test/CodeGen/RISCV/rvv/vl-opt.mir          |  4 ++--
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 1ba7f0b522a2b..67024d720ccfa 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -1311,9 +1311,24 @@ RISCVVLOptimizer::getMinimumVLForUser(MachineOperand &UserOp) {
 
 std::optional<MachineOperand> RISCVVLOptimizer::checkUsers(MachineInstr &MI) {
   std::optional<MachineOperand> CommonVL;
-  for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg())) {
+  SmallSetVector<MachineOperand *, 8> Worklist;
+  for (auto &UserOp : MRI->use_operands(MI.getOperand(0).getReg()))
+    Worklist.insert(&UserOp);
+
+  while (!Worklist.empty()) {
+    MachineOperand &UserOp = *Worklist.pop_back_val();
     const MachineInstr &UserMI = *UserOp.getParent();
     LLVM_DEBUG(dbgs() << "  Checking user: " << UserMI << "\n");
+
+    if (UserMI.isCopy() && UserMI.getOperand(0).getReg().isVirtual() &&
+        UserMI.getOperand(0).getSubReg() == RISCV::NoSubRegister &&
+        UserMI.getOperand(1).getSubReg() == RISCV::NoSubRegister) {
+      LLVM_DEBUG(dbgs() << "    Peeking through uses of COPY\n");
+      for (auto &CopyUse : MRI->use_operands(UserMI.getOperand(0).getReg()))
+        Worklist.insert(&CopyUse);
+      continue;
+    }
+
     if (mayReadPastVL(UserMI)) {
       LLVM_DEBUG(dbgs() << "    Abort because used by unsafe instruction\n");
       return std::nullopt;
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
index 403cc0eb9dce1..c249b3c5376fc 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll
@@ -901,9 +901,9 @@ define void @test_dag_loop() {
 ; CHECK-NEXT:    vssubu.vx v12, v8, zero, v0.t
 ; CHECK-NEXT:    vsetvli zero, zero, e8, m4, ta, ma
 ; CHECK-NEXT:    vmseq.vv v0, v12, v8
-; CHECK-NEXT:    vsetvli a0, zero, e16, m8, ta, ma
+; CHECK-NEXT:    vsetvli zero, zero, e16, m8, ta, ma
 ; CHECK-NEXT:    vmv.v.i v8, 0
-; CHECK-NEXT:    vsetivli zero, 0, e16, m8, tu, mu
+; CHECK-NEXT:    vsetvli zero, zero, e16, m8, tu, mu
 ; CHECK-NEXT:    vle16.v v8, (zero), v0.t
 ; CHECK-NEXT:    vse16.v v8, (zero)
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
index 0b65246765c1f..823c2bbd0c968 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -235,10 +235,9 @@ define void @fadd_fcmp_select_copy(<vscale x 4 x float> %v, <vscale x 4 x i1> %c
 ;
 ; VLOPT-LABEL: fadd_fcmp_select_copy:
 ; VLOPT:       # %bb.0:
-; VLOPT-NEXT:    vsetvli a2, zero, e32, m2, ta, ma
+; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vfadd.vv v8, v8, v8
 ; VLOPT-NEXT:    fmv.w.x fa5, zero
-; VLOPT-NEXT:    vsetvli zero, a1, e32, m2, ta, ma
 ; VLOPT-NEXT:    vmflt.vf v10, v8, fa5
 ; VLOPT-NEXT:    vmand.mm v10, v0, v10
 ; VLOPT-NEXT:    vse32.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index eaad479b59c1b..abf4faa59a98e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -295,7 +295,7 @@ name: copy
 body: |
   bb.0:
     ; CHECK-LABEL: name: copy
-    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = COPY %x
     ; CHECK-NEXT: %z:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
     %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
@@ -307,7 +307,7 @@ name: copy_multiple_users
 body: |
   bb.0:
     ; CHECK-LABEL: name: copy_multiple_users
-    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK: %x:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %y:vr = COPY %x
     ; CHECK-NEXT: %z0:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
     ; CHECK-NEXT: %z1:vr = PseudoVADD_VV_M1 $noreg, %y, $noreg, 3, 3 /* e8 */, 0 /* tu, mu */



More information about the llvm-commits mailing list