[llvm] [ARM] Fix VBSL Pseudo kill flags. (PR #109629)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 23 01:03:59 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: David Green (davemgreen)
<details>
<summary>Changes</summary>
When expanding a VBSP pseudo into VMOV; VBSL, if the first reg was killed in the BSP then the kill flags could be incorrect copied to the mov (vorr) and the vbsl. Drop the kill flags.
Note that this sometimes comes up when all the operands of the VBSP are the same, which can be optimized separately.
---
Full diff: https://github.com/llvm/llvm-project/pull/109629.diff
3 Files Affected:
- (modified) llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp (+3-2)
- (modified) llvm/test/CodeGen/ARM/expand-pseudos.mir (+21)
- (modified) llvm/test/CodeGen/ARM/vbsl.ll (+29-1)
``````````diff
diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index fe26d6c2dd090f..5be9d73022a6ee 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -2178,12 +2178,13 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
} else {
// Use move to satisfy constraints
unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
+ unsigned MO1Flags = getRegState(MI.getOperand(1)) & ~RegState::Kill;
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
.addReg(DstReg,
RegState::Define |
getRenamableRegState(MI.getOperand(0).isRenamable()))
- .add(MI.getOperand(1))
- .add(MI.getOperand(1))
+ .addReg(MI.getOperand(1).getReg(), MO1Flags)
+ .addReg(MI.getOperand(1).getReg(), MO1Flags)
.addImm(MI.getOperand(4).getImm())
.add(MI.getOperand(5));
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
diff --git a/llvm/test/CodeGen/ARM/expand-pseudos.mir b/llvm/test/CodeGen/ARM/expand-pseudos.mir
index 8aada5442536e4..bafcce280c8424 100644
--- a/llvm/test/CodeGen/ARM/expand-pseudos.mir
+++ b/llvm/test/CodeGen/ARM/expand-pseudos.mir
@@ -24,6 +24,9 @@
entry:
unreachable
}
+ define i32 @vbsl_kill_flags(i32 %x) {
+ unreachable
+ }
...
---
name: test1
@@ -141,3 +144,21 @@ body: |
BX_RET 14, $noreg, implicit $r0
...
+---
+name: vbsl_kill_flags
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0 (%ir-block.0):
+ liveins: $d1
+
+ ; CHECK-LABEL: name: vbsl_kill_flags
+ ; CHECK: liveins: $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $d0 = VORRd renamable $d1, renamable $d1, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $d0 = VBSLd killed renamable $d0, renamable $d1, renamable $d1, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
+ renamable $d0 = VBSPd killed renamable $d1, renamable $d1, renamable $d1, 14 /* CC::al */, $noreg
+ BX_RET 14 /* CC::al */, $noreg, implicit $d0
+
+...
diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll
index 8564a48fbc3dbc..d5aaf3e6f30bd3 100644
--- a/llvm/test/CodeGen/ARM/vbsl.ll
+++ b/llvm/test/CodeGen/ARM/vbsl.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=armv7-eabihf -mattr=+neon %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv7-eabihf -mattr=+neon -verify-machineinstrs %s -o - | FileCheck %s
define <8 x i8> @v_bsli8(ptr %A, ptr %B, ptr %C) nounwind {
; CHECK-LABEL: v_bsli8:
@@ -261,6 +261,34 @@ define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounw
ret <2 x i64> %vbsl3.i
}
+define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: same_param_all:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vorr d0, d1, d1
+; CHECK-NEXT: vbsl d0, d1, d1
+; CHECK-NEXT: bx lr
+ %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %b, <8 x i8> %b, <8 x i8> %b)
+ ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @same_param_12(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: same_param_12:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vbsl d0, d1, d1
+; CHECK-NEXT: bx lr
+ %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %b)
+ ret <8 x i8> %vbsl.i
+}
+
+define <8 x i8> @same_param_01(<8 x i8> %a, <8 x i8> %b) {
+; CHECK-LABEL: same_param_01:
+; CHECK: @ %bb.0:
+; CHECK-NEXT: vbif d0, d1, d0
+; CHECK-NEXT: bx lr
+ %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %a, <8 x i8> %b)
+ ret <8 x i8> %vbsl.i
+}
+
declare <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
declare <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone
declare <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
``````````
</details>
https://github.com/llvm/llvm-project/pull/109629
More information about the llvm-commits
mailing list