[llvm] [RISCV][InsertVSETVLI] Remove redundant vsetvli by repeating the coalesce phase (PR #141298)

Min-Yih Hsu via llvm-commits llvm-commits at lists.llvm.org
Tue May 27 11:19:09 PDT 2025


https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/141298

>From 7cf8a9dae6d37fd29a66746a1e3ea32b80565f9b Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 23 May 2025 14:43:33 -0700
Subject: [PATCH 1/3] Pre-commit test

---
 .../RISCV/rvv/vsetvli-insert-coalesce.mir     | 85 +++++++++++++++++++
 1 file changed, 85 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir

diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
new file mode 100644
index 0000000000000..afa0a24224c87
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
@@ -0,0 +1,85 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=riscv64 -mattr=+v,+m,+b -run-pass=liveintervals,riscv-insert-vsetvli %s -o - | FileCheck %s
+
+--- |
+  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+  target triple = "riscv64"
+
+  @g0 = external global ptr
+  @coalesce.g1 = external constant [16 x ptr]
+
+  define fastcc i32 @coalesce() #0 {
+    ret i32 4
+  }
+
+  attributes #0 = { "target-features"="+v,+m,+b" }
+...
+---
+name:            coalesce
+alignment:       2
+tracksRegLiveness: true
+noPhis:          true
+tracksDebugUserValues: true
+body:             |
+  ; CHECK-LABEL: name: coalesce
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[DEF:%[0-9]+]]:gprnox0 = IMPLICIT_DEF
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x80000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   dead [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[DEF]], 199 /* e8, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype
+  ; CHECK-NEXT:   dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 killed $x0, 209 /* e32, m2, ta, ma */, implicit-def $vl, implicit-def $vtype
+  ; CHECK-NEXT:   renamable $v10m2 = PseudoVMV_V_I_M2 undef renamable $v10m2, 0, -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+  ; CHECK-NEXT:   liveins: $v10m2, $v12m2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   BEQ undef %2:gpr, $x0, %bb.2
+  ; CHECK-NEXT:   PseudoBR %bb.3
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.3:
+  ; CHECK-NEXT:   successors: %bb.1(0x7c000000), %bb.4(0x04000000)
+  ; CHECK-NEXT:   liveins: $v8m2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   $x0 = PseudoVSETVLI [[DEF]], 209 /* e32, m2, ta, ma */, implicit-def $vl, implicit-def $vtype
+  ; CHECK-NEXT:   renamable $v10 = PseudoVMV_S_X undef renamable $v10, undef %2:gpr, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype
+  ; CHECK-NEXT:   dead renamable $v8 = PseudoVREDSUM_VS_M2_E32 undef renamable $v8, killed undef renamable $v8m2, killed undef renamable $v10, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+  ; CHECK-NEXT:   BNE undef %3:gpr, $x0, %bb.1
+  ; CHECK-NEXT:   PseudoBR %bb.4
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.4:
+  ; CHECK-NEXT:   PseudoRET
+  bb.0:
+    successors: %bb.1(0x80000000)
+
+    %78:gprnox0 = IMPLICIT_DEF
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+    %46:gprnox0 = PseudoVSETVLI %78, 199 /* e8, mf2, ta, ma */, implicit-def dead $vl, implicit-def dead $vtype
+    renamable $v10m2 = PseudoVMV_V_I_M2 undef renamable $v10m2, 0, -1, 5 /* e32 */, 0 /* tu, mu */
+
+  bb.2:
+    successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+    liveins: $v10m2, $v12m2
+
+    BEQ undef %54:gpr, $x0, %bb.2
+    PseudoBR %bb.3
+
+  bb.3:
+    successors: %bb.1(0x7c000000), %bb.4(0x04000000)
+    liveins: $v8m2
+
+    renamable $v10 = PseudoVMV_S_X undef renamable $v10, undef %54:gpr, %46, 5 /* e32 */
+    dead renamable $v8 = PseudoVREDSUM_VS_M2_E32 undef renamable $v8, killed undef renamable $v8m2, killed undef renamable $v10, %46, 5 /* e32 */, 0 /* tu, mu */
+    BNE undef %29:gpr, $x0, %bb.1
+    PseudoBR %bb.4
+
+  bb.4:
+    PseudoRET
+...

>From e74254a9a8c035e56d40f878af2d2d916b8791f7 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 23 May 2025 14:12:48 -0700
Subject: [PATCH 2/3] [RISCV] Remove redundant vsetvli by repeating the
 coalasce phase

---
 llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp  | 32 +++++++++++++++----
 .../RISCV/rvv/vsetvli-insert-coalesce.mir     |  1 -
 2 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 2d79ced1cc163..97a415a3b0c6f 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -26,6 +26,7 @@
 
 #include "RISCV.h"
 #include "RISCVSubtarget.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveDebugVariables.h"
 #include "llvm/CodeGen/LiveIntervals.h"
@@ -895,7 +896,8 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
 
   bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
                             const DemandedFields &Used) const;
-  void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
+  void coalesceVSETVLIs(SetVector<MachineBasicBlock *> &Worklist,
+                        MachineBasicBlock &MBB) const;
 
   VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
   VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
@@ -1642,7 +1644,8 @@ bool RISCVInsertVSETVLI::canMutatePriorConfig(
   return areCompatibleVTYPEs(PriorVType, VType, Used);
 }
 
-void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
+void RISCVInsertVSETVLI::coalesceVSETVLIs(
+    SetVector<MachineBasicBlock *> &Worklist, MachineBasicBlock &MBB) const {
   MachineInstr *NextMI = nullptr;
   // We can have arbitrary code in successors, so VL and VTYPE
   // must be considered demanded.
@@ -1661,9 +1664,18 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
       LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
 
     MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
-    if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
-        MRI->use_nodbg_empty(OldVLReg))
-      ToDelete.push_back(VLOpDef);
+    if (VLOpDef && MRI->use_nodbg_empty(OldVLReg)) {
+      if (TII->isAddImmediate(*VLOpDef, OldVLReg))
+        ToDelete.push_back(VLOpDef);
+      // If the destination register of a vset* instruction becomes dead because
+      // of this, there might be a chance to eliminate it. Put into the worklist
+      // so that we can revisit it.
+      // Note that since this is a virtual register, the definition instruction
+      // is always placed earlier in the program order. Thus, we avoid
+      // enqueuing blocks in cycle and therefore guarantee to terminate.
+      if (RISCVInstrInfo::isVectorConfigInstr(*VLOpDef))
+        Worklist.insert(VLOpDef->getParent());
+    }
   };
 
   for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
@@ -1840,8 +1852,14 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
   // any cross block analysis within the dataflow.  We can't have both
   // demanded fields based mutation and non-local analysis in the
   // dataflow at the same time without introducing inconsistencies.
-  for (MachineBasicBlock &MBB : MF)
-    coalesceVSETVLIs(MBB);
+  using BBPtrIterator = pointer_iterator<MachineFunction::iterator>;
+  SetVector<MachineBasicBlock *> Worklist(BBPtrIterator(MF.begin()),
+                                          BBPtrIterator(MF.end()));
+  while (!Worklist.empty()) {
+    MachineBasicBlock *MBB = Worklist.front();
+    Worklist.erase(Worklist.begin());
+    coalesceVSETVLIs(Worklist, *MBB);
+  }
 
   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
   // of VLEFF/VLSEGFF.
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
index afa0a24224c87..d750421038519 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
@@ -30,7 +30,6 @@ body:             |
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   successors: %bb.2(0x80000000)
   ; CHECK-NEXT: {{  $}}
-  ; CHECK-NEXT:   dead [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[DEF]], 199 /* e8, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype
   ; CHECK-NEXT:   dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 killed $x0, 209 /* e32, m2, ta, ma */, implicit-def $vl, implicit-def $vtype
   ; CHECK-NEXT:   renamable $v10m2 = PseudoVMV_V_I_M2 undef renamable $v10m2, 0, -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
   ; CHECK-NEXT: {{  $}}

>From 5f1cfb6735eff1733dc15c77a7c3bf991b672146 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 27 May 2025 10:13:59 -0700
Subject: [PATCH 3/3] fixup! Visit the blocks in post-order instead

Co-Authored-By: Luke Lau <luke at igalia.com>
---
 llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 36 ++++++--------------
 1 file changed, 11 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 97a415a3b0c6f..8fe8dfabee297 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -26,7 +26,7 @@
 
 #include "RISCV.h"
 #include "RISCVSubtarget.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/CodeGen/LiveDebugVariables.h"
 #include "llvm/CodeGen/LiveIntervals.h"
@@ -896,8 +896,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
 
   bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
                             const DemandedFields &Used) const;
-  void coalesceVSETVLIs(SetVector<MachineBasicBlock *> &Worklist,
-                        MachineBasicBlock &MBB) const;
+  void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
 
   VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
   VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
@@ -1644,8 +1643,7 @@ bool RISCVInsertVSETVLI::canMutatePriorConfig(
   return areCompatibleVTYPEs(PriorVType, VType, Used);
 }
 
-void RISCVInsertVSETVLI::coalesceVSETVLIs(
-    SetVector<MachineBasicBlock *> &Worklist, MachineBasicBlock &MBB) const {
+void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
   MachineInstr *NextMI = nullptr;
   // We can have arbitrary code in successors, so VL and VTYPE
   // must be considered demanded.
@@ -1664,18 +1662,9 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(
       LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
 
     MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
-    if (VLOpDef && MRI->use_nodbg_empty(OldVLReg)) {
-      if (TII->isAddImmediate(*VLOpDef, OldVLReg))
-        ToDelete.push_back(VLOpDef);
-      // If the destination register of a vset* instruction becomes dead because
-      // of this, there might be a chance to eliminate it. Put into the worklist
-      // so that we can revisit it.
-      // Note that since this is a virtual register, the definition instruction
-      // is always placed earlier in the program order. Thus, we avoid
-      // enqueuing blocks in cycle and therefore guarantee to terminate.
-      if (RISCVInstrInfo::isVectorConfigInstr(*VLOpDef))
-        Worklist.insert(VLOpDef->getParent());
-    }
+    if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
+        MRI->use_nodbg_empty(OldVLReg))
+      ToDelete.push_back(VLOpDef);
   };
 
   for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
@@ -1852,14 +1841,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
   // any cross block analysis within the dataflow.  We can't have both
   // demanded fields based mutation and non-local analysis in the
   // dataflow at the same time without introducing inconsistencies.
-  using BBPtrIterator = pointer_iterator<MachineFunction::iterator>;
-  SetVector<MachineBasicBlock *> Worklist(BBPtrIterator(MF.begin()),
-                                          BBPtrIterator(MF.end()));
-  while (!Worklist.empty()) {
-    MachineBasicBlock *MBB = Worklist.front();
-    Worklist.erase(Worklist.begin());
-    coalesceVSETVLIs(Worklist, *MBB);
-  }
+  // We're visiting blocks from the bottom up because a VSETVLI in the
+  // earlier block might become dead when its uses in later blocks are
+  // optimized away.
+  for (MachineBasicBlock *MBB : post_order(&MF))
+    coalesceVSETVLIs(*MBB);
 
   // Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
   // of VLEFF/VLSEGFF.



More information about the llvm-commits mailing list