[llvm] [ARM] Lower arm_neon_vbsl to ARMISD::VBSP and fold (vbsl x, y, y) to y (PR #109761)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 24 00:27:20 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/109761
This helps clean up the patterns a little and will help share combines on both the intrinsic and VBSP. A combine is then added to fold away the VBSP if both the selected operands are the same.
>From 2bfc2d2c07bfe958dd52b8c3489567ae5023bf6f Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Tue, 24 Sep 2024 08:23:52 +0100
Subject: [PATCH] [ARM] Lower arm_neon_vbsl to ARMISD::VBSP and fold (vbsl x,
y, y) to y
This helps clean up the patterns a little and will help share combines on both
the intrinsic and VBSP. A combine is then added to fold away the VBSP if both
the selected operands are the same.
---
llvm/lib/Target/ARM/ARMISelLowering.cpp | 9 +++++
llvm/lib/Target/ARM/ARMInstrNEON.td | 54 +++++++++++--------------
llvm/test/CodeGen/ARM/vbsl.ll | 5 +--
3 files changed, 35 insertions(+), 33 deletions(-)
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index a03928b618df03..f891aece26848c 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -17653,6 +17653,11 @@ SDValue ARMTargetLowering::PerformIntrinsicCombine(SDNode *N,
// No immediate versions of these to check for.
break;
+ case Intrinsic::arm_neon_vbsl: {
+ SDLoc dl(N);
+ return DAG.getNode(ARMISD::VBSP, dl, N->getValueType(0), N->getOperand(1),
+ N->getOperand(2), N->getOperand(3));
+ }
case Intrinsic::arm_mve_vqdmlah:
case Intrinsic::arm_mve_vqdmlash:
case Intrinsic::arm_mve_vqrdmlah:
@@ -19072,6 +19077,10 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
break;
}
+ case ARMISD::VBSP:
+ if (N->getOperand(1) == N->getOperand(2))
+ return N->getOperand(1);
+ return SDValue();
case ISD::INTRINSIC_VOID:
case ISD::INTRINSIC_W_CHAIN:
switch (N->getConstantOperandVal(1)) {
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index fcabc9076e4d30..48dcbdb137123a 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -5524,26 +5524,23 @@ def : Pat<(v16i8 (vnotq QPR:$src)),
// with different register constraints; it just inserts copies.
// That is why pseudo VBSP implemented. Is is expanded later into
// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
-def VBSPd
- : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
- IIC_VBINiD, "",
- [(set DPR:$Vd,
- (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+def VBSPd : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+ IIC_VBINiD, "", []>;
let Predicates = [HasNEON] in {
-def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
- (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
+def : Pat<(v8i8 (NEONvbsp (v8i8 DPR:$src1),
+ (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
-def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
- (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
+def : Pat<(v4i16 (NEONvbsp (v4i16 DPR:$src1),
+ (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
-def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
- (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
+def : Pat<(v2i32 (NEONvbsp (v2i32 DPR:$src1),
+ (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
-def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
- (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
+def : Pat<(v2f32 (NEONvbsp (v2f32 DPR:$src1),
+ (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
-def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
- (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
+def : Pat<(v1i64 (NEONvbsp (v1i64 DPR:$src1),
+ (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
def : Pat<(v8i8 (or (and DPR:$Vn, DPR:$Vd),
@@ -5560,26 +5557,23 @@ def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
(VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
}
-def VBSPq
- : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
- IIC_VBINiQ, "",
- [(set QPR:$Vd,
- (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
+def VBSPq : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+ IIC_VBINiQ, "", []>;
let Predicates = [HasNEON] in {
-def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
- (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
+def : Pat<(v16i8 (NEONvbsp (v16i8 QPR:$src1),
+ (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
- (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
+def : Pat<(v8i16 (NEONvbsp (v8i16 QPR:$src1),
+ (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
- (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
+def : Pat<(v4i32 (NEONvbsp (v4i32 QPR:$src1),
+ (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
- (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
+def : Pat<(v4f32 (NEONvbsp (v4f32 QPR:$src1),
+ (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
-def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
- (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
+def : Pat<(v2i64 (NEONvbsp (v2i64 QPR:$src1),
+ (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
def : Pat<(v16i8 (or (and QPR:$Vn, QPR:$Vd),
diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll
index d5aaf3e6f30bd3..0ef725fc91b547 100644
--- a/llvm/test/CodeGen/ARM/vbsl.ll
+++ b/llvm/test/CodeGen/ARM/vbsl.ll
@@ -264,8 +264,7 @@ define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounw
define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: same_param_all:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vorr d0, d1, d1
-; CHECK-NEXT: vbsl d0, d1, d1
+; CHECK-NEXT: vmov.f64 d0, d1
; CHECK-NEXT: bx lr
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %b, <8 x i8> %b, <8 x i8> %b)
ret <8 x i8> %vbsl.i
@@ -274,7 +273,7 @@ define <8 x i8> @same_param_all(<8 x i8> %a, <8 x i8> %b) {
define <8 x i8> @same_param_12(<8 x i8> %a, <8 x i8> %b) {
; CHECK-LABEL: same_param_12:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vbsl d0, d1, d1
+; CHECK-NEXT: vmov.f64 d0, d1
; CHECK-NEXT: bx lr
%vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %b)
ret <8 x i8> %vbsl.i
More information about the llvm-commits
mailing list