[llvm] a029ece - [RISCV] Fix coalescing vsetvlis when AVL and vl registers are the same (#141941)

via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 6 08:34:30 PDT 2025


Author: Luke Lau
Date: 2025-06-06T17:34:27+02:00
New Revision: a029ece7b0077ef7417362891b32a53a825adb32

URL: https://github.com/llvm/llvm-project/commit/a029ece7b0077ef7417362891b32a53a825adb32
DIFF: https://github.com/llvm/llvm-project/commit/a029ece7b0077ef7417362891b32a53a825adb32.diff

LOG: [RISCV] Fix coalescing vsetvlis when AVL and vl registers are the same (#141941)

With EVL tail folding we can end up with vsetvlis where the output vl
and the input AVL are the same register. When we try to coalesce it we
crashed because we tried to move the def's live interval before the
kill's live interval, e.g. in this example:

    (vn0 def)
dead $x0 = PseudoVSETIVLI 1, 192, implicit-def $vl, implicit-def $vtype
    renamable $v9 = COPY killed renamable $v8
(vn1 def) %23:gprnox0 = PseudoVSETVLI killed (vn0) %23:gprnox0, 197,
implicit-def $vl, implicit-def $vtype

We would try to move the vn1 def VNInfo up to the previous VSETVLI, in
the middle of vn0's segment.

However separately, we were also assuming that the vl would only have
one definition and thus were just taking the VNInfo from beginIndex(),
so we ended up with a backwards segment and got the error "Cannot create
empty or backwards segment".

This fixes these two issues, the first one by moving the AVL operand +
live interval up first, and the second by taking the VNInfo from
NextMI's slot index.

Fixes #141907

Added: 
    llvm/test/CodeGen/RISCV/rvv/pr141907.ll

Modified: 
    llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
    llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
index e1c69c69a99ca..53192e9dfe6c6 100644
--- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp
@@ -1698,13 +1698,24 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
           MI.getOperand(0).setReg(DefReg);
           MI.getOperand(0).setIsDead(false);
 
+          // Move the AVL from NextMI to MI
+          dropAVLUse(MI.getOperand(1));
+          if (NextMI->getOperand(1).isImm())
+            MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
+          else
+            MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
+                                              false);
+          dropAVLUse(NextMI->getOperand(1));
+
           // The def of DefReg moved to MI, so extend the LiveInterval up to
           // it.
           if (DefReg.isVirtual() && LIS) {
             LiveInterval &DefLI = LIS->getInterval(DefReg);
             SlotIndex MISlot = LIS->getInstructionIndex(MI).getRegSlot();
-            VNInfo *DefVNI = DefLI.getVNInfoAt(DefLI.beginIndex());
-            LiveInterval::Segment S(MISlot, DefLI.beginIndex(), DefVNI);
+            SlotIndex NextMISlot =
+                LIS->getInstructionIndex(*NextMI).getRegSlot();
+            VNInfo *DefVNI = DefLI.getVNInfoAt(NextMISlot);
+            LiveInterval::Segment S(MISlot, NextMISlot, DefVNI);
             DefLI.addSegment(S);
             DefVNI->def = MISlot;
             // Mark DefLI as spillable if it was previously unspillable
@@ -1715,13 +1726,6 @@ void RISCVInsertVSETVLI::coalesceVSETVLIs(MachineBasicBlock &MBB) const {
             LIS->shrinkToUses(&DefLI);
           }
 
-          dropAVLUse(MI.getOperand(1));
-          if (NextMI->getOperand(1).isImm())
-            MI.getOperand(1).ChangeToImmediate(NextMI->getOperand(1).getImm());
-          else
-            MI.getOperand(1).ChangeToRegister(NextMI->getOperand(1).getReg(),
-                                              false);
-
           MI.setDesc(NextMI->getDesc());
         }
         MI.getOperand(2).setImm(NextMI->getOperand(2).getImm());

diff  --git a/llvm/test/CodeGen/RISCV/rvv/pr141907.ll b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll
new file mode 100644
index 0000000000000..648b47dc440c3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/pr141907.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=riscv64 -mcpu=sifive-p670 | FileCheck %s
+
+define void @pr141907(ptr %0) nounwind {
+; CHECK-LABEL: pr141907:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi sp, sp, -16
+; CHECK-NEXT:    csrr a1, vlenb
+; CHECK-NEXT:    slli a1, a1, 2
+; CHECK-NEXT:    sub sp, sp, a1
+; CHECK-NEXT:    vsetivli zero, 0, e32, m1, ta, ma
+; CHECK-NEXT:    vmv.v.i v9, 0
+; CHECK-NEXT:    vmclr.m v0
+; CHECK-NEXT:    li a1, 0
+; CHECK-NEXT:    vsetvli a3, zero, e16, mf2, ta, ma
+; CHECK-NEXT:    vmv.v.i v12, 0
+; CHECK-NEXT:    addi a2, sp, 16
+; CHECK-NEXT:  .LBB0_1: # %vector.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vs4r.v v8, (a2)
+; CHECK-NEXT:    vsetvli a1, a1, e8, mf8, ta, ma
+; CHECK-NEXT:    vsetivli zero, 0, e16, mf2, ta, ma
+; CHECK-NEXT:    vnsrl.wi v11, v9, 0, v0.t
+; CHECK-NEXT:    vsetvli a3, zero, e32, m1, ta, ma
+; CHECK-NEXT:    vlseg3e32.v v8, (a2)
+; CHECK-NEXT:    vsetivli zero, 0, e16, mf2, ta, ma
+; CHECK-NEXT:    vsseg2e16.v v11, (zero)
+; CHECK-NEXT:    bnez a1, .LBB0_1
+; CHECK-NEXT:  .LBB0_2: # %while.body5
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vsetivli zero, 1, e16, m1, ta, ma
+; CHECK-NEXT:    vse16.v v9, (a0)
+; CHECK-NEXT:    j .LBB0_2
+entry:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %entry
+  %evl.based.iv = phi i64 [ 0, %entry ], [ %2, %vector.body ]
+  %vector.recur = phi <vscale x 2 x i32> [ zeroinitializer, %entry ], [ %3, %vector.body ]
+  %1 = call i32 @llvm.experimental.get.vector.length.i64(i64 %evl.based.iv, i32 1, i1 true)
+  %2 = zext i32 %1 to i64
+  %wide.masked.load = call <vscale x 6 x i32> @llvm.vp.load.nxv6i32.p0(ptr null, <vscale x 6 x i1> zeroinitializer, i32 0)
+  %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3.nxv6i32(<vscale x 6 x i32> %wide.masked.load)
+  %3 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 1
+  %vp.cast65 = call <vscale x 2 x i16> @llvm.vp.trunc.nxv2i16.nxv2i32(<vscale x 2 x i32> %vector.recur, <vscale x 2 x i1> zeroinitializer, i32 0)
+  %interleaved.vec = call <vscale x 4 x i16> @llvm.vector.interleave2.nxv4i16(<vscale x 2 x i16> %vp.cast65, <vscale x 2 x i16> zeroinitializer)
+  call void @llvm.vp.store.nxv4i16.p0(<vscale x 4 x i16> %interleaved.vec, ptr null, <vscale x 4 x i1> splat (i1 true), i32 0)
+  %4 = icmp eq i32 %1, 0
+  br i1 %4, label %while.body5, label %vector.body
+
+while.body5:                                      ; preds = %while.body5, %vector.body
+  %5 = bitcast <vscale x 2 x i32> %3 to <vscale x 4 x i16>
+  %cond52 = extractelement <vscale x 4 x i16> %5, i64 0
+  store i16 %cond52, ptr %0, align 2
+  br label %while.body5
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
index 266f35f6dd62e..6bd03eb0c2226 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.mir
@@ -96,6 +96,10 @@
     ret void
   }
 
+  define void @coalesce_vl_avl_same_reg() {
+    ret void
+  }
+
   declare <vscale x 1 x i64> @llvm.riscv.vadd.nxv1i64.nxv1i64.i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i64>, i64) #1
 
   declare <vscale x 1 x i64> @llvm.riscv.vle.nxv1i64.i64(<vscale x 1 x i64>, ptr nocapture, i64) #4
@@ -408,7 +412,7 @@ body:             |
     ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x10
     ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
     ; CHECK-NEXT: [[PseudoVLE64_V_M1_:%[0-9]+]]:vr = PseudoVLE64_V_M1 undef $noreg, [[COPY2]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
-    ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */
+    ; CHECK-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit-def $vl, implicit-def $vtype
     ; CHECK-NEXT: dead $x0 = PseudoVSETVLI [[COPY]], 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
     ; CHECK-NEXT: [[PseudoVADD_VV_M1_:%[0-9]+]]:vr = PseudoVADD_VV_M1 undef $noreg, [[PseudoVLE64_V_M1_]], [[COPY1]], $noreg, 6 /* e64 */, 0 /* tu, mu */, implicit $vl, implicit $vtype
     ; CHECK-NEXT: $v8 = COPY [[PseudoVADD_VV_M1_]], implicit $vtype
@@ -601,3 +605,20 @@ body: |
     %x:gpr = COPY $x10
     renamable $v8 = PseudoVMV_S_X undef renamable $v8, killed renamable %x, 1, 5
     PseudoRET implicit $v8
+...
+---
+name: coalesce_vl_avl_same_reg
+tracksRegLiveness: true
+body: |
+  bb.0:
+    liveins: $x8, $v8
+    ; CHECK-LABEL: name: coalesce_vl_avl_same_reg
+    ; CHECK: liveins: $x8, $v8
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: %x:gprnox0 = COPY $x8
+    ; CHECK-NEXT: dead %x:gprnox0 = PseudoVSETVLI %x, 208 /* e32, m1, ta, ma */, implicit-def $vl, implicit-def $vtype
+    ; CHECK-NEXT: dead %v:vr = COPY $v8, implicit $vtype
+    %x:gprnox0 = COPY $x8
+    dead $x0 = PseudoVSETIVLI 1, 208, implicit-def $vl, implicit-def $vtype
+    %v:vr = COPY $v8, implicit $vtype
+    %x = PseudoVSETVLI %x, 208, implicit-def $vl, implicit-def $vtype


        


More information about the llvm-commits mailing list