[llvm] [AArch64][GlobalISel] Add lowering for constant BIT/BIF/BSP (PR #65897)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 12 00:13:57 PDT 2023
https://github.com/davemgreen updated https://github.com/llvm/llvm-project/pull/65897:
>From ff25643779a34417d7686431b07ec7d131409186 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sat, 9 Sep 2023 09:53:36 +0100
Subject: [PATCH] [AArch64][GlobalISel] Add lowering for constant BIT/BIF/BSP
The non-constant versions already work through tablegen patters, this mirrors
the basic support for or(and(X, C), and(Y, ~C)) from ISel tryCombineToBSL. BSP
gets expanded to either BIT, BIF or BSL depending on the best register
allocation. G_BIT can be replaced with G_BSP as a more general alternative.
---
llvm/lib/Target/AArch64/AArch64Combine.td | 10 ++-
llvm/lib/Target/AArch64/AArch64InstrGISel.td | 7 +-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 2 +-
.../GISel/AArch64PostLegalizerCombiner.cpp | 41 +++++++++
.../AArch64/GlobalISel/legalize-fcopysign.mir | 50 ++++++-----
.../CodeGen/AArch64/GlobalISel/select-bit.mir | 90 ++++++++++---------
.../AArch64/neon-bitwise-instructions.ll | 83 +++++------------
7 files changed, 153 insertions(+), 130 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index e31afe7a0f4af52..6a02c6d5388869b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -215,6 +215,14 @@ def unmerge_ext_to_unmerge : GICombineRule<
(apply [{ applyUnmergeExtToUnmerge(*${d}, MRI, B, Observer, ${matchinfo}); }])
>;
+def regtriple_matchdata : GIDefMatchData<"std::tuple<Register, Register, Register>">;
+def or_to_bsp: GICombineRule <
+ (defs root:$root, regtriple_matchdata:$matchinfo),
+ (match (wip_match_opcode G_OR):$root,
+ [{ return matchOrToBSP(*${root}, MRI, ${matchinfo}); }]),
+ (apply [{ applyOrToBSP(*${root}, MRI, B, ${matchinfo}); }])
+>;
+
// Post-legalization combines which should happen at all optimization levels.
// (E.g. ones that facilitate matching for the selector) For example, matching
// pseudos.
@@ -242,5 +250,5 @@ def AArch64PostLegalizerCombiner
constant_fold_binops, identity_combines,
ptr_add_immed_chain, overlapping_and,
split_store_zero_128, undef_combines,
- select_to_minmax]> {
+ select_to_minmax, or_to_bsp]> {
}
diff --git a/llvm/lib/Target/AArch64/AArch64InstrGISel.td b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
index f9f860607b5877c..0b3509cf02d68a5 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrGISel.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrGISel.td
@@ -215,8 +215,9 @@ def G_PREFETCH : AArch64GenericInstruction {
let hasSideEffects = 1;
}
-// Generic bitwise insert if true.
-def G_BIT : AArch64GenericInstruction {
+// Generic instruction for the BSP pseudo. It is expanded into BSP, which
+// expands into BSL/BIT/BIF after register allocation.
+def G_BSP : AArch64GenericInstruction {
let OutOperandList = (outs type0:$dst);
let InOperandList = (ins type0:$src1, type0:$src2, type0:$src3);
let hasSideEffects = 0;
@@ -252,7 +253,7 @@ def : GINodeEquiv<G_FCMGTZ, AArch64fcmgtz>;
def : GINodeEquiv<G_FCMLEZ, AArch64fcmlez>;
def : GINodeEquiv<G_FCMLTZ, AArch64fcmltz>;
-def : GINodeEquiv<G_BIT, AArch64bit>;
+def : GINodeEquiv<G_BSP, AArch64bsp>;
def : GINodeEquiv<G_EXTRACT_VECTOR_ELT, vector_extract>;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index e2df8fb1321df83..71e3b91f6d802f0 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -1791,7 +1791,7 @@ bool AArch64LegalizerInfo::legalizeFCopySign(MachineInstr &MI,
if (DstSize == 64)
Mask = MIRBuilder.buildFNeg(VecTy, Mask);
- auto Sel = MIRBuilder.buildInstr(AArch64::G_BIT, {VecTy}, {Ins1, Ins2, Mask});
+ auto Sel = MIRBuilder.buildInstr(AArch64::G_BSP, {VecTy}, {Mask, Ins1, Ins2});
// Build an unmerge whose 0th elt is the original G_FCOPYSIGN destination. We
// want this to eventually become an EXTRACT_SUBREG.
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
index 5e248f568effcb7..91c261888df9892 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp
@@ -338,6 +338,47 @@ void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,
Store.eraseFromParent();
}
+bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ std::tuple<Register, Register, Register> &MatchInfo) {
+ const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ if (!DstTy.isVector())
+ return false;
+
+ Register AO1, AO2, BVO1, BVO2;
+ if (!mi_match(
+ MI, MRI,
+ m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)), m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))
+ return false;
+
+ auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);
+ auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);
+ if (!BV1 || !BV2)
+ return false;
+
+ for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {
+ auto ValAndVReg1 =
+ getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);
+ auto ValAndVReg2 =
+ getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);
+ if (!ValAndVReg1 || !ValAndVReg2 ||
+ ValAndVReg1->Value != ~ValAndVReg2->Value)
+ return false;
+ }
+
+ MatchInfo = {AO1, AO2, BVO2};
+ return true;
+}
+
+void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,
+ MachineIRBuilder &B,
+ std::tuple<Register, Register, Register> &MatchInfo) {
+ B.setInstrAndDebugLoc(MI);
+ B.buildInstr(
+ AArch64::G_BSP, {MI.getOperand(0).getReg()},
+ {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});
+ MI.eraseFromParent();
+}
+
class AArch64PostLegalizerCombinerImpl : public Combiner {
protected:
// TODO: Make CombinerHelper methods const.
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir
index 912daad7d60b096..cae2c06e44c5a1f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fcopysign.mir
@@ -10,18 +10,19 @@ body: |
liveins: $s0, $s1
; CHECK-LABEL: name: legalize_s32
; CHECK: liveins: $s0, $s1
- ; CHECK: %val:_(s32) = COPY $s0
- ; CHECK: %sign:_(s32) = COPY $s1
- ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
- ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
- ; CHECK: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
- ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
- ; CHECK: [[BIT:%[0-9]+]]:_(<4 x s32>) = G_BIT [[IVEC]], [[IVEC1]], [[BUILD_VECTOR]]
- ; CHECK: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BIT]](<4 x s32>)
- ; CHECK: $s0 = COPY %fcopysign(s32)
- ; CHECK: RET_ReallyLR implicit $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %val:_(s32) = COPY $s0
+ ; CHECK-NEXT: %sign:_(s32) = COPY $s1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<4 x s32>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s32), [[C]](s32)
+ ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s32), [[C]](s32)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -2147483648
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C1]](s32), [[C1]](s32), [[C1]](s32), [[C1]](s32)
+ ; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<4 x s32>) = G_BSP [[BUILD_VECTOR]], [[IVEC]], [[IVEC1]]
+ ; CHECK-NEXT: %fcopysign:_(s32), %10:_(s32), %11:_(s32), %12:_(s32) = G_UNMERGE_VALUES [[BSP]](<4 x s32>)
+ ; CHECK-NEXT: $s0 = COPY %fcopysign(s32)
+ ; CHECK-NEXT: RET_ReallyLR implicit $s0
%val:_(s32) = COPY $s0
%sign:_(s32) = COPY $s1
%fcopysign:_(s32) = G_FCOPYSIGN %val, %sign(s32)
@@ -37,18 +38,19 @@ body: |
liveins: $d0, $d1
; CHECK-LABEL: name: legalize_s64
; CHECK: liveins: $d0, $d1
- ; CHECK: %val:_(s64) = COPY $d0
- ; CHECK: %sign:_(s64) = COPY $d1
- ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
- ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
- ; CHECK: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
- ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
- ; CHECK: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
- ; CHECK: [[BIT:%[0-9]+]]:_(<2 x s64>) = G_BIT [[IVEC]], [[IVEC1]], [[FNEG]]
- ; CHECK: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BIT]](<2 x s64>)
- ; CHECK: $d0 = COPY %fcopysign(s64)
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %val:_(s64) = COPY $d0
+ ; CHECK-NEXT: %sign:_(s64) = COPY $d1
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %val(s64), [[C]](s64)
+ ; CHECK-NEXT: [[IVEC1:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], %sign(s64), [[C]](s64)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[C]](s64), [[C]](s64)
+ ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(<2 x s64>) = G_FNEG [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[BSP:%[0-9]+]]:_(<2 x s64>) = G_BSP [[FNEG]], [[IVEC]], [[IVEC1]]
+ ; CHECK-NEXT: %fcopysign:_(s64), %10:_(s64) = G_UNMERGE_VALUES [[BSP]](<2 x s64>)
+ ; CHECK-NEXT: $d0 = COPY %fcopysign(s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%val:_(s64) = COPY $d0
%sign:_(s64) = COPY $d1
%fcopysign:_(s64) = G_FCOPYSIGN %val, %sign(s64)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir b/llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir
index 843810619c5c5d0..e3edb628097740f 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-bit.mir
@@ -13,16 +13,17 @@ body: |
; CHECK-LABEL: name: BITv8i8_v2s32
; CHECK: liveins: $d0, $d1, $d2
- ; CHECK: %lhs:fpr64 = COPY $d0
- ; CHECK: %mhs:fpr64 = COPY $d1
- ; CHECK: %rhs:fpr64 = COPY $d2
- ; CHECK: %bit:fpr64 = BITv8i8 %lhs, %mhs, %rhs
- ; CHECK: $d0 = COPY %bit
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:fpr64 = COPY $d0
+ ; CHECK-NEXT: %mhs:fpr64 = COPY $d1
+ ; CHECK-NEXT: %rhs:fpr64 = COPY $d2
+ ; CHECK-NEXT: %bit:fpr64 = BSPv8i8 %lhs, %mhs, %rhs
+ ; CHECK-NEXT: $d0 = COPY %bit
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%lhs:fpr(<2 x s32>) = COPY $d0
%mhs:fpr(<2 x s32>) = COPY $d1
%rhs:fpr(<2 x s32>) = COPY $d2
- %bit:fpr(<2 x s32>) = G_BIT %lhs, %mhs, %rhs
+ %bit:fpr(<2 x s32>) = G_BSP %lhs, %mhs, %rhs
$d0 = COPY %bit(<2 x s32>)
RET_ReallyLR implicit $d0
@@ -37,16 +38,17 @@ body: |
liveins: $d0, $d1, $d2
; CHECK-LABEL: name: BITv8i8_v4s16
; CHECK: liveins: $d0, $d1, $d2
- ; CHECK: %lhs:fpr64 = COPY $d0
- ; CHECK: %mhs:fpr64 = COPY $d1
- ; CHECK: %rhs:fpr64 = COPY $d2
- ; CHECK: %bit:fpr64 = BITv8i8 %lhs, %mhs, %rhs
- ; CHECK: $d0 = COPY %bit
- ; CHECK: RET_ReallyLR implicit $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:fpr64 = COPY $d0
+ ; CHECK-NEXT: %mhs:fpr64 = COPY $d1
+ ; CHECK-NEXT: %rhs:fpr64 = COPY $d2
+ ; CHECK-NEXT: %bit:fpr64 = BSPv8i8 %lhs, %mhs, %rhs
+ ; CHECK-NEXT: $d0 = COPY %bit
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
%lhs:fpr(<4 x s16>) = COPY $d0
%mhs:fpr(<4 x s16>) = COPY $d1
%rhs:fpr(<4 x s16>) = COPY $d2
- %bit:fpr(<4 x s16>) = G_BIT %lhs, %mhs, %rhs
+ %bit:fpr(<4 x s16>) = G_BSP %lhs, %mhs, %rhs
$d0 = COPY %bit(<4 x s16>)
RET_ReallyLR implicit $d0
@@ -62,16 +64,17 @@ body: |
; CHECK-LABEL: name: BITv16i8_v2s64
; CHECK: liveins: $q0, $q1, $q2
- ; CHECK: %lhs:fpr128 = COPY $q0
- ; CHECK: %mhs:fpr128 = COPY $q1
- ; CHECK: %rhs:fpr128 = COPY $q2
- ; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
- ; CHECK: $q0 = COPY %bit
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:fpr128 = COPY $q0
+ ; CHECK-NEXT: %mhs:fpr128 = COPY $q1
+ ; CHECK-NEXT: %rhs:fpr128 = COPY $q2
+ ; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
+ ; CHECK-NEXT: $q0 = COPY %bit
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:fpr(<2 x s64>) = COPY $q0
%mhs:fpr(<2 x s64>) = COPY $q1
%rhs:fpr(<2 x s64>) = COPY $q2
- %bit:fpr(<2 x s64>) = G_BIT %lhs, %mhs, %rhs
+ %bit:fpr(<2 x s64>) = G_BSP %lhs, %mhs, %rhs
$q0 = COPY %bit(<2 x s64>)
RET_ReallyLR implicit $q0
@@ -87,16 +90,17 @@ body: |
; CHECK-LABEL: name: BITv16i8_v4s32
; CHECK: liveins: $q0, $q1, $q2
- ; CHECK: %lhs:fpr128 = COPY $q0
- ; CHECK: %mhs:fpr128 = COPY $q1
- ; CHECK: %rhs:fpr128 = COPY $q2
- ; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
- ; CHECK: $q0 = COPY %bit
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:fpr128 = COPY $q0
+ ; CHECK-NEXT: %mhs:fpr128 = COPY $q1
+ ; CHECK-NEXT: %rhs:fpr128 = COPY $q2
+ ; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
+ ; CHECK-NEXT: $q0 = COPY %bit
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:fpr(<4 x s32>) = COPY $q0
%mhs:fpr(<4 x s32>) = COPY $q1
%rhs:fpr(<4 x s32>) = COPY $q2
- %bit:fpr(<4 x s32>) = G_BIT %lhs, %mhs, %rhs
+ %bit:fpr(<4 x s32>) = G_BSP %lhs, %mhs, %rhs
$q0 = COPY %bit(<4 x s32>)
RET_ReallyLR implicit $q0
@@ -112,16 +116,17 @@ body: |
; CHECK-LABEL: name: BITv16i8_v8s16
; CHECK: liveins: $q0, $q1, $q2
- ; CHECK: %lhs:fpr128 = COPY $q0
- ; CHECK: %mhs:fpr128 = COPY $q1
- ; CHECK: %rhs:fpr128 = COPY $q2
- ; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
- ; CHECK: $q0 = COPY %bit
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:fpr128 = COPY $q0
+ ; CHECK-NEXT: %mhs:fpr128 = COPY $q1
+ ; CHECK-NEXT: %rhs:fpr128 = COPY $q2
+ ; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
+ ; CHECK-NEXT: $q0 = COPY %bit
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:fpr(<8 x s16>) = COPY $q0
%mhs:fpr(<8 x s16>) = COPY $q1
%rhs:fpr(<8 x s16>) = COPY $q2
- %bit:fpr(<8 x s16>) = G_BIT %lhs, %mhs, %rhs
+ %bit:fpr(<8 x s16>) = G_BSP %lhs, %mhs, %rhs
$q0 = COPY %bit(<8 x s16>)
RET_ReallyLR implicit $q0
@@ -137,15 +142,16 @@ body: |
; CHECK-LABEL: name: BITv16i8_v16s8
; CHECK: liveins: $q0, $q1, $q2
- ; CHECK: %lhs:fpr128 = COPY $q0
- ; CHECK: %mhs:fpr128 = COPY $q1
- ; CHECK: %rhs:fpr128 = COPY $q2
- ; CHECK: %bit:fpr128 = BITv16i8 %lhs, %mhs, %rhs
- ; CHECK: $q0 = COPY %bit
- ; CHECK: RET_ReallyLR implicit $q0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: %lhs:fpr128 = COPY $q0
+ ; CHECK-NEXT: %mhs:fpr128 = COPY $q1
+ ; CHECK-NEXT: %rhs:fpr128 = COPY $q2
+ ; CHECK-NEXT: %bit:fpr128 = BSPv16i8 %lhs, %mhs, %rhs
+ ; CHECK-NEXT: $q0 = COPY %bit
+ ; CHECK-NEXT: RET_ReallyLR implicit $q0
%lhs:fpr(<16 x s8>) = COPY $q0
%mhs:fpr(<16 x s8>) = COPY $q1
%rhs:fpr(<16 x s8>) = COPY $q2
- %bit:fpr(<16 x s8>) = G_BIT %lhs, %mhs, %rhs
+ %bit:fpr(<16 x s8>) = G_BSP %lhs, %mhs, %rhs
$q0 = COPY %bit(<16 x s8>)
RET_ReallyLR implicit $q0
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 47de57a68be96a4..4f13b78d6c169a1 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -67,13 +67,9 @@ define <8 x i8> @bsl8xi8_const(<8 x i8> %a, <8 x i8> %b) {
;
; CHECK-GI-LABEL: bsl8xi8_const:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI6_1
-; CHECK-GI-NEXT: adrp x9, .LCPI6_0
-; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI6_1]
-; CHECK-GI-NEXT: ldr d3, [x9, :lo12:.LCPI6_0]
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-GI-NEXT: and v1.8b, v1.8b, v3.8b
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: adrp x8, .LCPI6_0
+; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI6_0]
+; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i8> %a, < i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0 >
%tmp2 = and <8 x i8> %b, < i8 0, i8 0, i8 -1, i8 -1, i8 0, i8 0, i8 -1, i8 -1 >
@@ -90,13 +86,9 @@ define <16 x i8> @bsl16xi8_const(<16 x i8> %a, <16 x i8> %b) {
;
; CHECK-GI-LABEL: bsl16xi8_const:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI7_1
-; CHECK-GI-NEXT: adrp x9, .LCPI7_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_1]
-; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI7_0]
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: adrp x8, .LCPI7_0
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI7_0]
+; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <16 x i8> %a, < i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0 >
%tmp2 = and <16 x i8> %b, < i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 -1 >
@@ -926,13 +918,9 @@ define <2 x i32> @bsl2xi32_const(<2 x i32> %a, <2 x i32> %b) {
;
; CHECK-GI-LABEL: bsl2xi32_const:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI70_1
-; CHECK-GI-NEXT: adrp x9, .LCPI70_0
-; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI70_1]
-; CHECK-GI-NEXT: ldr d3, [x9, :lo12:.LCPI70_0]
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-GI-NEXT: and v1.8b, v1.8b, v3.8b
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: adrp x8, .LCPI70_0
+; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI70_0]
+; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <2 x i32> %a, < i32 -1, i32 0 >
%tmp2 = and <2 x i32> %b, < i32 0, i32 -1 >
@@ -950,13 +938,9 @@ define <4 x i16> @bsl4xi16_const(<4 x i16> %a, <4 x i16> %b) {
;
; CHECK-GI-LABEL: bsl4xi16_const:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI71_1
-; CHECK-GI-NEXT: adrp x9, .LCPI71_0
-; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI71_1]
-; CHECK-GI-NEXT: ldr d3, [x9, :lo12:.LCPI71_0]
-; CHECK-GI-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-GI-NEXT: and v1.8b, v1.8b, v3.8b
-; CHECK-GI-NEXT: orr v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: adrp x8, .LCPI71_0
+; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI71_0]
+; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
; CHECK-GI-NEXT: ret
%tmp1 = and <4 x i16> %a, < i16 -1, i16 0, i16 -1,i16 0 >
%tmp2 = and <4 x i16> %b, < i16 0, i16 -1,i16 0, i16 -1 >
@@ -995,13 +979,9 @@ define <4 x i32> @bsl4xi32_const(<4 x i32> %a, <4 x i32> %b) {
;
; CHECK-GI-LABEL: bsl4xi32_const:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI73_1
-; CHECK-GI-NEXT: adrp x9, .LCPI73_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI73_1]
-; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI73_0]
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: adrp x8, .LCPI73_0
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI73_0]
+; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <4 x i32> %a, < i32 -1, i32 0, i32 -1, i32 0 >
%tmp2 = and <4 x i32> %b, < i32 0, i32 -1, i32 0, i32 -1 >
@@ -1018,13 +998,9 @@ define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) {
;
; CHECK-GI-LABEL: bsl8xi16_const:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI74_1
-; CHECK-GI-NEXT: adrp x9, .LCPI74_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI74_1]
-; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI74_0]
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
+; CHECK-GI-NEXT: adrp x8, .LCPI74_0
+; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI74_0]
+; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
; CHECK-GI-NEXT: ret
%tmp1 = and <8 x i16> %a, < i16 -1, i16 -1, i16 0,i16 0, i16 -1, i16 -1, i16 0,i16 0 >
%tmp2 = and <8 x i16> %b, < i16 0, i16 0, i16 -1, i16 -1, i16 0, i16 0, i16 -1, i16 -1 >
@@ -1033,23 +1009,12 @@ define <8 x i16> @bsl8xi16_const(<8 x i16> %a, <8 x i16> %b) {
}
define <2 x i64> @bsl2xi64_const(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-SD-LABEL: bsl2xi64_const:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: adrp x8, .LCPI75_0
-; CHECK-SD-NEXT: ldr q2, [x8, :lo12:.LCPI75_0]
-; CHECK-SD-NEXT: bif v0.16b, v1.16b, v2.16b
-; CHECK-SD-NEXT: ret
-;
-; CHECK-GI-LABEL: bsl2xi64_const:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI75_1
-; CHECK-GI-NEXT: adrp x9, .LCPI75_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI75_1]
-; CHECK-GI-NEXT: ldr q3, [x9, :lo12:.LCPI75_0]
-; CHECK-GI-NEXT: and v0.16b, v0.16b, v2.16b
-; CHECK-GI-NEXT: and v1.16b, v1.16b, v3.16b
-; CHECK-GI-NEXT: orr v0.16b, v0.16b, v1.16b
-; CHECK-GI-NEXT: ret
+; CHECK-LABEL: bsl2xi64_const:
+; CHECK: // %bb.0:
+; CHECK-NEXT: adrp x8, .LCPI75_0
+; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI75_0]
+; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b
+; CHECK-NEXT: ret
%tmp1 = and <2 x i64> %a, < i64 -1, i64 0 >
%tmp2 = and <2 x i64> %b, < i64 0, i64 -1 >
%tmp3 = or <2 x i64> %tmp1, %tmp2
More information about the llvm-commits
mailing list