[llvm] [RISCV] Only reduce VLs of instructions with demanded VLs (PR #168693)

Wed Nov 19 19:05:22 PST 2025

https://github.com/lukel97 updated https://github.com/llvm/llvm-project/pull/168693

>From d6da6bc58e1fb76e1829fcafafc303ef4b2dad91 Mon Sep 17 00:00:00 2001
From: Luke Lau <luke at igalia.com>
Date: Wed, 19 Nov 2025 18:44:32 +0800
Subject: [PATCH] [RISCV] Only reduce VLs of instructions with demanded VLs

In RISCVVLOptimizer we first compute all the demanded VLs, then we walk backwards through the function and try to reduce any VLs.

We don't actually need to walk backwards anymore since after #124530 the order in which we modify the instructions doesn't matter.

This patch changes it to just iterate over the instructions with a demanded VL computed, which means we don't iterate over scalar instructions etc.

This also fixes #168665, where we triggered an assert on instructions with a dead $vxsat implicit-def:

    dead %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat

Because $vxsat is a reserved register, DeadMachineInstructionElim won't remove it and the instruction makes it to RISCVVLOptimizer.

And because the def of %x is dead, we don't reach this instruction in the dataflow analysis. This instruction returns true for isCandidate, so we would try to lookup its demanded VL which doesn't exist and assert. But with this patch we don't try to reduce instructions that aren't in DemandedVLs, which fixes the crash.
---
 llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp | 53 ++++++++++------------
 llvm/test/CodeGen/RISCV/rvv/vl-opt.ll      | 25 ++++++++++
 llvm/test/CodeGen/RISCV/rvv/vl-opt.mir     | 12 +++++
 3 files changed, 60 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 0a8838cbd45c7..d6c3b117e3d9c 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -85,7 +85,7 @@ class RISCVVLOptimizer : public MachineFunctionPass {
   DemandedVL getMinimumVLForUser(const MachineOperand &UserOp) const;
   /// Returns true if the users of \p MI have compatible EEWs and SEWs.
   bool checkUsers(const MachineInstr &MI) const;
-  bool tryReduceVL(MachineInstr &MI) const;
+  bool tryReduceVL(MachineInstr &MI, MachineOperand VL) const;
   bool isCandidate(const MachineInstr &MI) const;
   void transfer(const MachineInstr &MI);
 
@@ -1568,7 +1568,8 @@ bool RISCVVLOptimizer::checkUsers(const MachineInstr &MI) const {
   return true;
 }
 
-bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
+bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI,
+                                   MachineOperand CommonVL) const {
   LLVM_DEBUG(dbgs() << "Trying to reduce VL for " << MI);
 
   unsigned VLOpNum = RISCVII::getVLOpNum(MI.getDesc());
@@ -1581,49 +1582,46 @@ bool RISCVVLOptimizer::tryReduceVL(MachineInstr &MI) const {
     return false;
   }
 
-  auto *CommonVL = &DemandedVLs.at(&MI).VL;
-
-  assert((CommonVL->isImm() || CommonVL->getReg().isVirtual()) &&
+  assert((CommonVL.isImm() || CommonVL.getReg().isVirtual()) &&
          "Expected VL to be an Imm or virtual Reg");
 
   // If the VL is defined by a vleff that doesn't dominate MI, try using the
   // vleff's AVL. It will be greater than or equal to the output VL.
-  if (CommonVL->isReg()) {
-    const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
+  if (CommonVL.isReg()) {
+    const MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
     if (RISCVInstrInfo::isFaultOnlyFirstLoad(*VLMI) &&
         !MDT->dominates(VLMI, &MI))
-      CommonVL = &VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
+      CommonVL = VLMI->getOperand(RISCVII::getVLOpNum(VLMI->getDesc()));
   }
 
-  if (!RISCV::isVLKnownLE(*CommonVL, VLOp)) {
+  if (!RISCV::isVLKnownLE(CommonVL, VLOp)) {
     LLVM_DEBUG(dbgs() << "  Abort due to CommonVL not <= VLOp.\n");
     return false;
   }
 
-  if (CommonVL->isIdenticalTo(VLOp)) {
+  if (CommonVL.isIdenticalTo(VLOp)) {
     LLVM_DEBUG(
         dbgs() << "  Abort due to CommonVL == VLOp, no point in reducing.\n");
     return false;
   }
 
-  if (CommonVL->isImm()) {
+  if (CommonVL.isImm()) {
     LLVM_DEBUG(dbgs() << "  Reduce VL from " << VLOp << " to "
-                      << CommonVL->getImm() << " for " << MI << "\n");
-    VLOp.ChangeToImmediate(CommonVL->getImm());
+                      << CommonVL.getImm() << " for " << MI << "\n");
+    VLOp.ChangeToImmediate(CommonVL.getImm());
     return true;
   }
-  const MachineInstr *VLMI = MRI->getVRegDef(CommonVL->getReg());
+  const MachineInstr *VLMI = MRI->getVRegDef(CommonVL.getReg());
   if (!MDT->dominates(VLMI, &MI)) {
     LLVM_DEBUG(dbgs() << "  Abort due to VL not dominating.\n");
     return false;
   }
-  LLVM_DEBUG(
-      dbgs() << "  Reduce VL from " << VLOp << " to "
-             << printReg(CommonVL->getReg(), MRI->getTargetRegisterInfo())
-             << " for " << MI << "\n");
+  LLVM_DEBUG(dbgs() << "  Reduce VL from " << VLOp << " to "
+                    << printReg(CommonVL.getReg(), MRI->getTargetRegisterInfo())
+                    << " for " << MI << "\n");
 
   // All our checks passed. We can reduce VL.
-  VLOp.ChangeToRegister(CommonVL->getReg(), false);
+  VLOp.ChangeToRegister(CommonVL.getReg(), false);
   return true;
 }
 
@@ -1678,18 +1676,13 @@ bool RISCVVLOptimizer::runOnMachineFunction(MachineFunction &MF) {
   // Then go through and see if we can reduce the VL of any instructions to
   // only what's demanded.
   bool MadeChange = false;
-  for (MachineBasicBlock &MBB : MF) {
-    // Avoid unreachable blocks as they have degenerate dominance
-    if (!MDT->isReachableFromEntry(&MBB))
+  for (auto &[MI, VL] : DemandedVLs) {
+    assert(MDT->isReachableFromEntry(MI->getParent()));
+    if (!isCandidate(*MI))
       continue;
-
-    for (auto &MI : reverse(MBB)) {
-      if (!isCandidate(MI))
-        continue;
-      if (!tryReduceVL(MI))
-        continue;
-      MadeChange = true;
-    }
+    if (!tryReduceVL(*const_cast<MachineInstr *>(MI), VL.VL))
+      continue;
+    MadeChange = true;
   }
 
   DemandedVLs.clear();
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
index 3844b984455c4..359601150cb98 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.ll
@@ -325,3 +325,28 @@ bar:
   %c = call <vscale x 4 x i32> @llvm.riscv.vadd(<vscale x 4 x i32> poison, <vscale x 4 x i32> %a, iXLen 2, iXLen 2)
   ret <vscale x 4 x i32> %c
 }
+
+; The vsmul.vx ends up dead, but doesn't get deleted before RISCVVLOptimizer. Make
+; sure we don't crash when we handle it.
+; TODO: DeadMachineInstructionElim should remove the dead vsmul.vx.
+define <vscale x 2 x i64> @dead_vsmul() {
+; CHECK-LABEL: dead_vsmul:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetivli zero, 0, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v10, 0
+; CHECK-NEXT:    csrwi vxrm, 0
+; CHECK-NEXT:    vsetvli a0, zero, e64, m2, ta, ma
+; CHECK-NEXT:    vmv.v.i v8, 0
+; CHECK-NEXT:    vsetivli zero, 0, e64, m2, tu, ma
+; CHECK-NEXT:    vmv.v.v v8, v8
+; CHECK-NEXT:    vsetvli zero, zero, e16, mf2, tu, ma
+; CHECK-NEXT:    vsmul.vx v10, v10, zero
+; CHECK-NEXT:    ret
+entry:
+  %0 = call <vscale x 2 x i16> @llvm.riscv.vsmul(<vscale x 2 x i16> zeroinitializer, <vscale x 2 x i16> zeroinitializer, i16 0, iXLen 0, iXLen 0)
+  %1 = call <vscale x 2 x i32> @llvm.riscv.vwmacc(<vscale x 2 x i32> zeroinitializer, i16 0, <vscale x 2 x i16> %0, iXLen 0, iXLen 0)
+  %2 = call <vscale x 2 x i64> @llvm.riscv.vwmul(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i32> %1, <vscale x 2 x i32> zeroinitializer, iXLen 0)
+  %3 = call <vscale x 2 x i64> @llvm.riscv.vmerge(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i64> %2, <vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> splat (i1 true), iXLen 0)
+
+  ret <vscale x 2 x i64> %3
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
index 4d6d0e122b1cf..4d573c98615f9 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt.mir
@@ -393,6 +393,18 @@ body: |
     $v8 = COPY %y
 ...
 ---
+name: vxsat_instr_dead
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: vxsat_instr_dead
+    ; CHECK: %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat
+    ; CHECK-NEXT: %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: $v8 = COPY %y
+    %x:vr = PseudoVSADDU_VV_M1 $noreg, $noreg, $noreg, -1, 3 /* e8 */, 0 /* tu, mu */, implicit-def dead $vxsat
+    %y:vr = PseudoVADD_VV_M1 $noreg, $noreg, $noreg, 1, 3 /* e8 */, 0 /* tu, mu */
+    $v8 = COPY %y
+...
+---
 name: copy
 body: |
   bb.0: