[llvm] [RISCV][Peephole] Clear kill flags for registers after fold VMV (PR #138847)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 7 03:49:59 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Piyou Chen (BeMg)
<details>
<summary>Changes</summary>
Without clearing kill flags, this pass will generate bad machine code.
```
*** Bad machine code: Using a killed virtual register ***
- function: main
- basic block: %bb.0 entry (0x437ef928)
- instruction: %12:vrn7m1 = INSERT_SUBREG %11:vrn7m1(tied-def 0), %0:vr, %subreg.sub_vrm1_0
- operand 2: %0:vr
```
---
Full diff: https://github.com/llvm/llvm-project/pull/138847.diff
2 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp (+2)
- (added) llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir (+136)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
index 7c05ff1f1a70e..a7f763bd0c64a 100644
--- a/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVectorPeephole.cpp
@@ -585,6 +585,7 @@ bool RISCVVectorPeephole::foldUndefPassthruVMV_V_V(MachineInstr &MI) {
MRI->replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(2).getReg());
MI.eraseFromParent();
+ MRI->clearKillFlags(MI.getOperand(2).getReg());
return true;
}
@@ -655,6 +656,7 @@ bool RISCVVectorPeephole::foldVMV_V_V(MachineInstr &MI) {
MRI->replaceRegWith(MI.getOperand(0).getReg(), Src->getOperand(0).getReg());
MI.eraseFromParent();
+ MRI->clearKillFlags(MI.getOperand(2).getReg());
return true;
}
diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir
new file mode 100644
index 0000000000000..1b22d63836e08
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmv-with-killed-reg.mir
@@ -0,0 +1,136 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc %s -o - -mtriple=riscv64 -mattr=+v -run-pass=riscv-vector-peephole \
+# RUN: -verify-machineinstrs | FileCheck %s
+
+--- |
+ source_filename = "reduced.ll"
+ target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+ target triple = "riscv64-unknown-linux-gnu"
+
+ define i32 @main() #0 {
+ entry:
+ %0 = tail call <vscale x 4 x i16> @llvm.riscv.vmv.v.v.nxv4i16.i64(<vscale x 4 x i16> poison, <vscale x 4 x i16> zeroinitializer, i64 0)
+ %1 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv4i16(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) zeroinitializer, <vscale x 4 x i16> %0, i32 0)
+ %2 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv4i16(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %1, <vscale x 4 x i16> zeroinitializer, i32 0)
+ call void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv8i8_7t.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %2, ptr null, i64 0, i64 4)
+ ret i32 0
+ }
+
+ declare <vscale x 4 x i16> @llvm.riscv.vmv.v.v.nxv4i16.i64(<vscale x 4 x i16>, <vscale x 4 x i16>, i64) #1
+
+ declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv8i8_7t.nxv4i16(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), <vscale x 4 x i16>, i32 immarg) #2
+
+ declare void @llvm.riscv.vsseg7.triscv.vector.tuple_nxv8i8_7t.i64(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr captures(none), i64, i64 immarg) #3
+
+ attributes #0 = { "target-features"="+v" }
+ attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) "target-features"="+v" }
+ attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-features"="+v" }
+ attributes #3 = { nocallback nofree nosync nounwind willreturn memory(argmem: write) "target-features"="+v" }
+
+...
+---
+name: main
+alignment: 4
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+hasWinCFI: false
+noPhis: false
+isSSA: true
+noVRegs: false
+hasFakeUses: false
+callsEHReturn: false
+callsUnwindInit: false
+hasEHScopes: false
+hasEHFunclets: false
+isOutlined: false
+debugInstrRef: false
+failsVerification: false
+tracksDebugUserValues: false
+registers:
+ - { id: 0, class: vr, preferred-register: '', flags: [ ] }
+ - { id: 1, class: vr, preferred-register: '', flags: [ ] }
+ - { id: 2, class: vr, preferred-register: '', flags: [ ] }
+ - { id: 3, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 4, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 5, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 6, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 7, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 8, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 9, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 10, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 11, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 12, class: vrn7m1, preferred-register: '', flags: [ ] }
+ - { id: 13, class: gpr, preferred-register: '', flags: [ ] }
+liveins: []
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 0
+ offsetAdjustment: 0
+ maxAlignment: 1
+ adjustsStack: false
+ hasCalls: false
+ stackProtector: ''
+ functionContext: ''
+ maxCallFrameSize: 4294967295
+ cvBytesOfCalleeSavedRegisters: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+ hasTailCall: false
+ isCalleeSavedInfoValid: false
+ localFrameSize: 0
+ savePoint: ''
+ restorePoint: ''
+fixedStack: []
+stack: []
+entry_values: []
+callSites: []
+debugValueSubstitutions: []
+constants: []
+machineFunctionInfo:
+ varArgsFrameIndex: 0
+ varArgsSaveSize: 0
+body: |
+ bb.0.entry:
+ ; CHECK-LABEL: name: main
+ ; CHECK: [[PseudoVMV_V_I_M1_:%[0-9]+]]:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 4 /* e16 */, 0 /* tu, mu */
+ ; CHECK-NEXT: [[PseudoVMV_V_I_M1_1:%[0-9]+]]:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 0 /* tu, mu */
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:vrn7m1 = IMPLICIT_DEF
+ ; CHECK-NEXT: [[INSERT_SUBREG:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[DEF]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG1:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_1
+ ; CHECK-NEXT: [[INSERT_SUBREG2:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG1]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_2
+ ; CHECK-NEXT: [[INSERT_SUBREG3:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG2]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_3
+ ; CHECK-NEXT: [[INSERT_SUBREG4:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG3]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_4
+ ; CHECK-NEXT: [[INSERT_SUBREG5:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG4]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_5
+ ; CHECK-NEXT: [[INSERT_SUBREG6:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG5]], [[PseudoVMV_V_I_M1_1]], %subreg.sub_vrm1_6
+ ; CHECK-NEXT: [[INSERT_SUBREG7:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG6]], [[PseudoVMV_V_I_M1_]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[INSERT_SUBREG8:%[0-9]+]]:vrn7m1 = INSERT_SUBREG [[INSERT_SUBREG7]], [[PseudoVMV_V_I_M1_]], %subreg.sub_vrm1_0
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x0
+ ; CHECK-NEXT: PseudoVSSEG7E16_V_M1 killed [[INSERT_SUBREG8]], [[COPY]], 0, 4 /* e16 */ :: (store unknown-size into `ptr null`, align 2)
+ ; CHECK-NEXT: $x10 = COPY [[COPY]]
+ ; CHECK-NEXT: PseudoRET implicit $x10
+ %0:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 4 /* e16 */, 0 /* tu, mu */
+ %1:vr = PseudoVMV_V_V_M1 $noreg, %0, 0, 4 /* e16 */, 0 /* tu, mu */
+ %2:vr = PseudoVMV_V_I_M1 $noreg, 0, -1, 3 /* e8 */, 0 /* tu, mu */
+ %4:vrn7m1 = IMPLICIT_DEF
+ %3:vrn7m1 = INSERT_SUBREG %4, %2, %subreg.sub_vrm1_0
+ %5:vrn7m1 = INSERT_SUBREG %3, %2, %subreg.sub_vrm1_1
+ %6:vrn7m1 = INSERT_SUBREG %5, %2, %subreg.sub_vrm1_2
+ %7:vrn7m1 = INSERT_SUBREG %6, %2, %subreg.sub_vrm1_3
+ %8:vrn7m1 = INSERT_SUBREG %7, %2, %subreg.sub_vrm1_4
+ %9:vrn7m1 = INSERT_SUBREG %8, %2, %subreg.sub_vrm1_5
+ %10:vrn7m1 = INSERT_SUBREG %9, %2, %subreg.sub_vrm1_6
+ %11:vrn7m1 = INSERT_SUBREG %10, killed %1, %subreg.sub_vrm1_0
+ %12:vrn7m1 = INSERT_SUBREG %11, %0, %subreg.sub_vrm1_0
+ %13:gpr = COPY $x0
+ PseudoVSSEG7E16_V_M1 killed %12, %13, 0, 4 /* e16 */ :: (store unknown-size into `ptr null`, align 2)
+ $x10 = COPY %13
+ PseudoRET implicit $x10
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/138847
More information about the llvm-commits
mailing list