[llvm] [RISCV][InsertVSETVLI] Remove redundant vsetvli by repeating the coalesce phase (PR #141298)
Min-Yih Hsu via llvm-commits
llvm-commits at lists.llvm.org
Tue May 27 11:19:09 PDT 2025
https://github.com/mshockwave updated https://github.com/llvm/llvm-project/pull/141298
>From 7cf8a9dae6d37fd29a66746a1e3ea32b80565f9b Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 23 May 2025 14:43:33 -0700
Subject: [PATCH 1/3] Pre-commit test
---
.../RISCV/rvv/vsetvli-insert-coalesce.mir | 85 +++++++++++++++++++
1 file changed, 85 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
new file mode 100644
index 0000000000000..afa0a24224c87
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
@@ -0,0 +1,85 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=riscv64 -mattr=+v,+m,+b -run-pass=liveintervals,riscv-insert-vsetvli %s -o - | FileCheck %s
+
+--- |
+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+ target triple = "riscv64"
+
+ @g0 = external global ptr
+ @coalesce.g1 = external constant [16 x ptr]
+
+ define fastcc i32 @coalesce() #0 {
+ ret i32 4
+ }
+
+ attributes #0 = { "target-features"="+v,+m,+b" }
+...
+---
+name: coalesce
+alignment: 2
+tracksRegLiveness: true
+noPhis: true
+tracksDebugUserValues: true
+body: |
+ ; CHECK-LABEL: name: coalesce
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:gprnox0 = IMPLICIT_DEF
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1:
+ ; CHECK-NEXT: successors: %bb.2(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: dead [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[DEF]], 199 /* e8, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 killed $x0, 209 /* e32, m2, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: renamable $v10m2 = PseudoVMV_V_I_M2 undef renamable $v10m2, 0, -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2:
+ ; CHECK-NEXT: successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+ ; CHECK-NEXT: liveins: $v10m2, $v12m2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: BEQ undef %2:gpr, $x0, %bb.2
+ ; CHECK-NEXT: PseudoBR %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3:
+ ; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.4(0x04000000)
+ ; CHECK-NEXT: liveins: $v8m2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: $x0 = PseudoVSETVLI [[DEF]], 209 /* e32, m2, ta, ma */, implicit-def $vl, implicit-def $vtype
+ ; CHECK-NEXT: renamable $v10 = PseudoVMV_S_X undef renamable $v10, undef %2:gpr, $noreg, 5 /* e32 */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: dead renamable $v8 = PseudoVREDSUM_VS_M2_E32 undef renamable $v8, killed undef renamable $v8m2, killed undef renamable $v10, $noreg, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
+ ; CHECK-NEXT: BNE undef %3:gpr, $x0, %bb.1
+ ; CHECK-NEXT: PseudoBR %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4:
+ ; CHECK-NEXT: PseudoRET
+ bb.0:
+ successors: %bb.1(0x80000000)
+
+ %78:gprnox0 = IMPLICIT_DEF
+
+ bb.1:
+ successors: %bb.2(0x80000000)
+
+ %46:gprnox0 = PseudoVSETVLI %78, 199 /* e8, mf2, ta, ma */, implicit-def dead $vl, implicit-def dead $vtype
+ renamable $v10m2 = PseudoVMV_V_I_M2 undef renamable $v10m2, 0, -1, 5 /* e32 */, 0 /* tu, mu */
+
+ bb.2:
+ successors: %bb.3(0x04000000), %bb.2(0x7c000000)
+ liveins: $v10m2, $v12m2
+
+ BEQ undef %54:gpr, $x0, %bb.2
+ PseudoBR %bb.3
+
+ bb.3:
+ successors: %bb.1(0x7c000000), %bb.4(0x04000000)
+ liveins: $v8m2
+
+ renamable $v10 = PseudoVMV_S_X undef renamable $v10, undef %54:gpr, %46, 5 /* e32 */
+ dead renamable $v8 = PseudoVREDSUM_VS_M2_E32 undef renamable $v8, killed undef renamable $v8m2, killed undef renamable $v10, %46, 5 /* e32 */, 0 /* tu, mu */
+ BNE undef %29:gpr, $x0, %bb.1
+ PseudoBR %bb.4
+
+ bb.4:
+ PseudoRET
+...
>From e74254a9a8c035e56d40f878af2d2d916b8791f7 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Fri, 23 May 2025 14:12:48 -0700
Subject: [PATCH 2/3] [RISCV] Remove redundant vsetvli by repeating the
coalasce phase
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 32 +++++++++++++++----
.../RISCV/rvv/vsetvli-insert-coalesce.mir | 1 -
2 files changed, 25 insertions(+), 8 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 2d79ced1cc163..97a415a3b0c6f 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -26,6 +26,7 @@
#include "RISCV.h"
#include "RISCVSubtarget.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -895,7 +896,8 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
const DemandedFields &Used) const;
- void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
+ void coalesceVSETVLIs(SetVector<MachineBasicBlock *> &Worklist,
+ MachineBasicBlock &MBB) const;
VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
@@ -1642,7 +1644,8 @@ bool RISCVInsertVSETVLI::canMutatePriorConfig(
return areCompatibleVTYPEs(PriorVType, VType, Used);
}
-void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
+void RISCVInsertVSETVLI::coalesceVSETVLIs(
+ SetVector<MachineBasicBlock *> &Worklist, MachineBasicBlock &MBB) const {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
@@ -1661,9 +1664,18 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
- if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
- MRI->use_nodbg_empty(OldVLReg))
- ToDelete.push_back(VLOpDef);
+ if (VLOpDef && MRI->use_nodbg_empty(OldVLReg)) {
+ if (TII->isAddImmediate(*VLOpDef, OldVLReg))
+ ToDelete.push_back(VLOpDef);
+ // If the destination register of a vset* instruction becomes dead because
+ // of this, there might be a chance to eliminate it. Put into the worklist
+ // so that we can revisit it.
+ // Note that since this is a virtual register, the definition instruction
+ // is always placed earlier in the program order. Thus, we avoid
+ // enqueuing blocks in cycle and therefore guarantee to terminate.
+ if (RISCVInstrInfo::isVectorConfigInstr(*VLOpDef))
+ Worklist.insert(VLOpDef->getParent());
+ }
};
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
@@ -1840,8 +1852,14 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
- for (MachineBasicBlock &MBB : MF)
- coalesceVSETVLIs(MBB);
+ using BBPtrIterator = pointer_iterator<MachineFunction::iterator>;
+ SetVector<MachineBasicBlock *> Worklist(BBPtrIterator(MF.begin()),
+ BBPtrIterator(MF.end()));
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = Worklist.front();
+ Worklist.erase(Worklist.begin());
+ coalesceVSETVLIs(Worklist, *MBB);
+ }
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
index afa0a24224c87..d750421038519 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-coalesce.mir
@@ -30,7 +30,6 @@ body: |
; CHECK-NEXT: bb.1:
; CHECK-NEXT: successors: %bb.2(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: dead [[PseudoVSETVLI:%[0-9]+]]:gprnox0 = PseudoVSETVLI [[DEF]], 199 /* e8, mf2, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: dead [[PseudoVSETVLIX0_:%[0-9]+]]:gpr = PseudoVSETVLIX0 killed $x0, 209 /* e32, m2, ta, ma */, implicit-def $vl, implicit-def $vtype
; CHECK-NEXT: renamable $v10m2 = PseudoVMV_V_I_M2 undef renamable $v10m2, 0, -1, 5 /* e32 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
; CHECK-NEXT: {{ $}}
>From 5f1cfb6735eff1733dc15c77a7c3bf991b672146 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <min.hsu at sifive.com>
Date: Tue, 27 May 2025 10:13:59 -0700
Subject: [PATCH 3/3] fixup! Visit the blocks in post-order instead
Co-Authored-By: Luke Lau <luke at igalia.com>
---
llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp | 36 ++++++--------------
1 file changed, 11 insertions(+), 25 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index 97a415a3b0c6f..8fe8dfabee297 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -26,7 +26,7 @@
#include "RISCV.h"
#include "RISCVSubtarget.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervals.h"
@@ -896,8 +896,7 @@ class RISCVInsertVSETVLI : public MachineFunctionPass {
bool canMutatePriorConfig(const MachineInstr &PrevMI, const MachineInstr &MI,
const DemandedFields &Used) const;
- void coalesceVSETVLIs(SetVector<MachineBasicBlock *> &Worklist,
- MachineBasicBlock &MBB) const;
+ void coalesceVSETVLIs(MachineBasicBlock &MBB) const;
VSETVLIInfo getInfoForVSETVLI(const MachineInstr &MI) const;
VSETVLIInfo computeInfoForInstr(const MachineInstr &MI) const;
@@ -1644,8 +1643,7 @@ bool RISCVInsertVSETVLI::canMutatePriorConfig(
return areCompatibleVTYPEs(PriorVType, VType, Used);
}
-void RISCVInsertVSETVLI::coalesceVSETVLIs(
- SetVector<MachineBasicBlock *> &Worklist, MachineBasicBlock &MBB) const {
+void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
MachineInstr *NextMI = nullptr;
// We can have arbitrary code in successors, so VL and VTYPE
// must be considered demanded.
@@ -1664,18 +1662,9 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(
LIS->shrinkToUses(&LIS->getInterval(OldVLReg));
MachineInstr *VLOpDef = MRI->getUniqueVRegDef(OldVLReg);
- if (VLOpDef && MRI->use_nodbg_empty(OldVLReg)) {
- if (TII->isAddImmediate(*VLOpDef, OldVLReg))
- ToDelete.push_back(VLOpDef);
- // If the destination register of a vset* instruction becomes dead because
- // of this, there might be a chance to eliminate it. Put into the worklist
- // so that we can revisit it.
- // Note that since this is a virtual register, the definition instruction
- // is always placed earlier in the program order. Thus, we avoid
- // enqueuing blocks in cycle and therefore guarantee to terminate.
- if (RISCVInstrInfo::isVectorConfigInstr(*VLOpDef))
- Worklist.insert(VLOpDef->getParent());
- }
+ if (VLOpDef && TII->isAddImmediate(*VLOpDef, OldVLReg) &&
+ MRI->use_nodbg_empty(OldVLReg))
+ ToDelete.push_back(VLOpDef);
};
for (MachineInstr &MI : make_early_inc_range(reverse(MBB))) {
@@ -1852,14 +1841,11 @@ bool RISCVInsertVSETVLI::runOnMachineFunction(MachineFunction &MF) {
// any cross block analysis within the dataflow. We can't have both
// demanded fields based mutation and non-local analysis in the
// dataflow at the same time without introducing inconsistencies.
- using BBPtrIterator = pointer_iterator<MachineFunction::iterator>;
- SetVector<MachineBasicBlock *> Worklist(BBPtrIterator(MF.begin()),
- BBPtrIterator(MF.end()));
- while (!Worklist.empty()) {
- MachineBasicBlock *MBB = Worklist.front();
- Worklist.erase(Worklist.begin());
- coalesceVSETVLIs(Worklist, *MBB);
- }
+ // We're visiting blocks from the bottom up because a VSETVLI in the
+ // earlier block might become dead when its uses in later blocks are
+ // optimized away.
+ for (MachineBasicBlock *MBB : post_order(&MF))
+ coalesceVSETVLIs(*MBB);
// Insert PseudoReadVL after VLEFF/VLSEGFF and replace it with the vl output
// of VLEFF/VLSEGFF.
More information about the llvm-commits
mailing list