[llvm] [RISCV][Peephole] Clear kill flags for registers after fold VMV (PR #138847)

Piyou Chen via llvm-commits llvm-commits at lists.llvm.org
Wed May 7 03:49:28 PDT 2025


https://github.com/BeMg created https://github.com/llvm/llvm-project/pull/138847

Without clearing kill flags, this pass will generate bad machine code.

```
*** Bad machine code: Using a killed virtual register ***
- function:    main
- basic block: %bb.0 entry (0x437ef928)
- instruction: %12:vrn7m1 = INSERT_SUBREG %11:vrn7m1(tied-def 0), %0:vr, %subreg.sub_vrm1_0
- operand 2:   %0:vr
```



>From b21e1a1f5aea08d991be06f3755d78ac87e7578b Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Wed, 7 May 2025 03:43:36 -0700
Subject: [PATCH 1/2] precommit

---
 .../rvv/rvv-peephole-vmv-with-killed-reg.mir  | 136 ++++++++++++++++++
 1 file changed, 136 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir

diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir
new file mode 100644
index 0000000000000..cf380e7f35ebe
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir
@@ -0,0 +1,136 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-vector-peephole \
+# RUN:  | FileCheck %s
+
+--- |
+  source_filename = "reduced.ll"
+  target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+  target triple = "riscv64-unknown-linux-gnu"
+
+  define i32 @main() #0 {
+  entry:
+    %0 = tail call <vscale x 4 x i16> @llvm.riscv.vmv.v.v.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> zeroinitializer, i64 0)
+    %1 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv4i16(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) zeroinitializer, <vscale x 4 x i16> %0, i32 0)
+    %2 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv4i16(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %1, <vscale x 4 x i16> zeroinitializer, i32 0)
+    call void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv8i8_7t.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %2, ptr null, i64 0, i64 4)
+    ret i32 0
+  }
+
+  declare <vscale x 4 x i16> @llvm.riscv.vmv.v.v.nxv4i16.i64(<vscale x 4 x i16>, <vscale x 4 x i16>, i64) #1
+
+  declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv4i16(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), <vscale x 4 x i16>, i32 immarg) #2
+
+  declare void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv8i8_7t.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr captures(none), i64, i64 immarg) #3
+
+  attributes #0 = { "target-features"="+v" }
+  attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) "target-features"="+v" }
+  attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-features"="+v" }
+  attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) "target-features"="+v" }
+
+...
+---
+name:            main
+alignment:       4
+exposesReturnsTwice: false
+legalized:       false
+regBankSelected: false
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+hasWinCFI:       false
+noPhis:          false
+isSSA:           true
+noVRegs:         false
+hasFakeUses:     false
+callsEHReturn:   false
+callsUnwindInit: false
+hasEHScopes:     false
+hasEHFunclets:   false
+isOutlined:      false
+debugInstrRef:   false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+  - { id: 0, class: vr, preferred-register: '', flags: [  ] }
+  - { id: 1, class: vr, preferred-register: '', flags: [  ] }
+  - { id: 2, class: vr, preferred-register: '', flags: [  ] }
+  - { id: 3, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 4, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 5, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 6, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 7, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 8, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 9, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 10, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 11, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 12, class: vrn7m1, preferred-register: '', flags: [  ] }
+  - { id: 13, class: gpr, preferred-register: '', flags: [  ] }
+liveins:         []
+frameInfo:
+  isFrameAddressTaken: false
+  isReturnAddressTaken: false
+  hasStackMap:     false
+  hasPatchPoint:   false
+  stackSize:       0
+  offsetAdjustment: 0
+  maxAlignment:    1
+  adjustsStack:    false
+  hasCalls:        false
+  stackProtector:  ''
+  functionContext: ''
+  maxCallFrameSize: 4294967295
+  cvBytesOfCalleeSavedRegisters: 0
+  hasOpaqueSPAdjustment: false
+  hasVAStart:      false
+  hasMustTailInVarArgFunc: false
+  hasTailCall:     false
+  isCalleeSavedInfoValid: false
+  localFrameSize:  0
+  savePoint:       ''
+  restorePoint:    ''
+fixedStack:      []
+stack:           []
+entry_values:    []
+callSites:       []
+debugValueSubstitutions: []
+constants:       []
+machineFunctionInfo:
+  varArgsFrameIndex: 0
+  varArgsSaveSize: 0
+body:             |
+  bb.0.entry:
+    ; CHECK-LABEL: name: main
+    ; CHECK: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 4 /* e16 */, 0 /* tu, mu */
+    ; CHECK-NEXT: [[PseudoVMV_V_I_M1_1:%[0-9]+]]:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 0 /* tu, mu */
+    ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn7m1 = IMPLICIT_DEF
+    ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[DEF]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_0
+    ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_1
+    ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_2
+    ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_3
+    ; CHECK-NEXT: [[INSERT_SUBREG4:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG3]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_4
+    ; CHECK-NEXT: [[INSERT_SUBREG5:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG4]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_5
+    ; CHECK-NEXT: [[INSERT_SUBREG6:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG5]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_6
+    ; CHECK-NEXT: [[INSERT_SUBREG7:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG6]], killed [[PseudoVMV_V_I_M1_]], %subreg.sub_vrm1_0
+    ; CHECK-NEXT: [[INSERT_SUBREG8:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG7]], [[PseudoVMV_V_I_M1_]], %subreg.sub_vrm1_0
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0
+    ; CHECK-NEXT: PseudoVSSEG7E16_V_M1 killed [[INSERT_SUBREG8]], [[COPY]], 0, 4 /* e16 */ :: (store unknown-size into `ptr null`, align 2)
+    ; CHECK-NEXT: $x10 = COPY [[COPY]]
+    ; CHECK-NEXT: PseudoRET implicit $x10
+    %0:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 4 /* e16 */, 0 /* tu, mu */
+    %1:vr = PseudoVMV_V_V_M1 $noreg, %0, 0, 4 /* e16 */, 0 /* tu, mu */
+    %2:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 0 /* tu, mu */
+    %4:vrn7m1 = IMPLICIT_DEF
+    %3:vrn7m1 = INSERT_SUBREG %4, %2, %subreg.sub_vrm1_0
+    %5:vrn7m1 = INSERT_SUBREG %3, %2, %subreg.sub_vrm1_1
+    %6:vrn7m1 = INSERT_SUBREG %5, %2, %subreg.sub_vrm1_2
+    %7:vrn7m1 = INSERT_SUBREG %6, %2, %subreg.sub_vrm1_3
+    %8:vrn7m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_4
+    %9:vrn7m1 = INSERT_SUBREG %8, %2, %subreg.sub_vrm1_5
+    %10:vrn7m1 = INSERT_SUBREG %9, %2, %subreg.sub_vrm1_6
+    %11:vrn7m1 = INSERT_SUBREG %10, killed %1, %subreg.sub_vrm1_0
+    %12:vrn7m1 = INSERT_SUBREG %11, %0, %subreg.sub_vrm1_0
+    %13:gpr = COPY $x0
+    PseudoVSSEG7E16_V_M1 killed %12, %13, 0, 4 /* e16 */ :: (store unknown-size into `ptr null`, align 2)
+    $x10 = COPY %13
+    PseudoRET implicit $x10
+...

>From ede944841bd6a850a259d576ab810d96a8dd440f Mon Sep 17 00:00:00 2001
From: Piyou Chen <piyou.chen at sifive.com>
Date: Wed, 7 May 2025 03:47:38 -0700
Subject: [PATCH 2/2] [RISCV][Peephole] Clear kill flags for registers after
 fold VMV

---
 llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp                 | 2 ++
 .../CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir    | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 7c05ff1f1a70e..a7f763bd0c64a 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -585,6 +585,7 @@ bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
 
   MRI->replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(2).getReg());
   MI.eraseFromParent();
+  MRI->clearKillFlags(MI.getOperand(2).getReg());
   return true;
 }
 
@@ -655,6 +656,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
   MRI->replaceRegWith(MI.getOperand(0).getReg(), Src->getOperand(0).getReg());
   MI.eraseFromParent();
 
+  MRI->clearKillFlags(MI.getOperand(2).getReg());
   return true;
 }
 
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir
index cf380e7f35ebe..1b22d63836e08 100644
--- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
 # RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-vector-peephole \
-# RUN:  | FileCheck %s
+# RUN:  -verify-machineinstrs | FileCheck %s
 
 --- |
   source_filename = "reduced.ll"
@@ -110,7 +110,7 @@ body:             |
     ; CHECK-NEXT: [[INSERT_SUBREG4:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG3]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_4
     ; CHECK-NEXT: [[INSERT_SUBREG5:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG4]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_5
     ; CHECK-NEXT: [[INSERT_SUBREG6:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG5]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_6
-    ; CHECK-NEXT: [[INSERT_SUBREG7:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG6]], killed [[PseudoVMV_V_I_M1_]], %subreg.sub_vrm1_0
+    ; CHECK-NEXT: [[INSERT_SUBREG7:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG6]], [[PseudoVMV_V_I_M1_]], %subreg.sub_vrm1_0
     ; CHECK-NEXT: [[INSERT_SUBREG8:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG7]], [[PseudoVMV_V_I_M1_]], %subreg.sub_vrm1_0
     ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0
     ; CHECK-NEXT: PseudoVSSEG7E16_V_M1 killed [[INSERT_SUBREG8]], [[COPY]], 0, 4 /* e16 */ :: (store unknown-size into `ptr null`, align 2)



More information about the llvm-commits mailing list