[llvm] [AArch64][GlobalISel] Legalize vector boolean bitcasts to scalars by lowering via stack. (PR #121171)
Amara Emerson via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 5 21:32:32 PST 2025
https://github.com/aemerson updated https://github.com/llvm/llvm-project/pull/121171
>From 0be38ccf5c865b4fddc357b33c378c70a20532b9 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Thu, 26 Dec 2024 16:13:55 -0800
Subject: [PATCH 1/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20to=20main=20this=20commit=20is=20based=20on?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
[skip ci]
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 14 ++++++--
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 1 +
.../legalize-store-vector-bools.mir | 32 +++++++++++++++++++
3 files changed, 45 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index e2247f76098e97..a931123638ffb9 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3022,8 +3022,18 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return UnableToLegalize;
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
- if (!Ty.isScalar())
- return UnableToLegalize;
+ if (!Ty.isScalar()) {
+ // We need to widen the vector element type.
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
+ // We also need to adjust the MMO to turn this into a truncating store.
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+ MachineFunction &MF = MIRBuilder.getMF();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), Ty);
+ MI.setMemRefs(MF, {NewMMO});
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
Observer.changingInstr(MI);
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 4b7d4158faf069..2c35482b7c9e5f 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -454,6 +454,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
{nxv2s64, p0, nxv2s64, 8},
})
.clampScalar(0, s8, s64)
+ .minScalarOrElt(0, s8)
.lowerIf([=](const LegalityQuery &Query) {
return Query.Types[0].isScalar() &&
Query.Types[0] != Query.MMODescrs[0].MemoryTy;
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
new file mode 100644
index 00000000000000..de70f89461780b
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
@@ -0,0 +1,32 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
+# This test currently is expected to fall back after reaching truncstore of <8 x s8> as <8 x s1>.
+---
+name: store_8xs1
+tracksRegLiveness: true
+body: |
+ bb.1:
+ liveins: $q0, $q1, $x0
+ ; CHECK-LABEL: name: store_8xs1
+ ; CHECK: liveins: $q0, $q1, $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
+ ; CHECK-NEXT: %ptr:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(slt), [[CONCAT_VECTORS]](<8 x s32>), [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s8>) = G_ANYEXT [[ICMP]](<8 x s1>)
+ ; CHECK-NEXT: G_STORE [[ANYEXT]](<8 x s8>), %ptr(p0) :: (store (<8 x s1>))
+ ; CHECK-NEXT: RET_ReallyLR
+ %1:_(<4 x s32>) = COPY $q0
+ %2:_(<4 x s32>) = COPY $q1
+ %ptr:_(p0) = COPY $x0
+ %0:_(<8 x s32>) = G_CONCAT_VECTORS %1(<4 x s32>), %2(<4 x s32>)
+ %4:_(s32) = G_CONSTANT i32 0
+ %3:_(<8 x s32>) = G_BUILD_VECTOR %4(s32), %4(s32), %4(s32), %4(s32), %4(s32), %4(s32), %4(s32), %4(s32)
+ %5:_(<8 x s1>) = G_ICMP intpred(slt), %0(<8 x s32>), %3
+ G_STORE %5(<8 x s1>), %ptr(p0) :: (store (<8 x s1>))
+ RET_ReallyLR
+...
>From 18da0bff65252d4ef62f7dcefa73b7b508d10bec Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Fri, 27 Dec 2024 10:49:17 -0800
Subject: [PATCH 2/4] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20ch?=
=?UTF-8?q?anges=20introduced=20through=20rebase?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Created using spr 1.3.5
[skip ci]
---
llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index a931123638ffb9..7af074a37e13c3 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3022,6 +3022,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return UnableToLegalize;
LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ assert(!Ty.isPointerOrPointerVector() && "Can't widen type");
if (!Ty.isScalar()) {
// We need to widen the vector element type.
Observer.changingInstr(MI);
>From b9214baba592d4c7860d714b6d0dffd519a48400 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Fri, 27 Dec 2024 17:34:25 -0800
Subject: [PATCH 3/4] Factor out into funct.
Created using spr 1.3.5
---
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 3 +
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 47 +-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 3 +-
.../AArch64/GlobalISel/legalize-bitcast.mir | 59 +-
.../legalize-store-vector-bools.mir | 68 +-
.../AArch64/vec-combine-compare-to-bitmask.ll | 605 ++++++++++++++----
6 files changed, 640 insertions(+), 145 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index fac059803b9489..4e18f5cc913a7e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -302,6 +302,9 @@ class LegalizerHelper {
/// same type as \p Res.
MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val);
+ /// Given a store of a boolean vector, scalarize it.
+ LegalizeResult scalarizeVectorBooleanStore(GStore &MI);
+
/// Get a pointer to vector element \p Index located in memory for a vector of
/// type \p VecTy starting at a base address of \p VecPtr. If \p Index is out
/// of bounds the returned pointer is unspecified, but will be within the
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 7dece931e8e0eb..0bfa897ecf4047 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4143,9 +4143,8 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
}
if (MemTy.isVector()) {
- // TODO: Handle vector trunc stores
if (MemTy != SrcTy)
- return UnableToLegalize;
+ return scalarizeVectorBooleanStore(StoreMI);
// TODO: We can do better than scalarizing the vector and at least split it
// in half.
@@ -4200,6 +4199,50 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
return Legalized;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::scalarizeVectorBooleanStore(GStore &StoreMI) {
+ Register SrcReg = StoreMI.getValueReg();
+ Register PtrReg = StoreMI.getPointerReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ MachineMemOperand &MMO = **StoreMI.memoperands_begin();
+ LLT MemTy = MMO.getMemoryType();
+ LLT MemScalarTy = MemTy.getElementType();
+ MachineFunction &MF = MIRBuilder.getMF();
+
+ assert(SrcTy.isVector() && "Expect a vector store type");
+
+ if (!MemScalarTy.isByteSized()) {
+ // We need to build an integer scalar of the vector bit pattern.
+ // It's not legal for us to add padding when storing a vector.
+ unsigned NumBits = MemTy.getSizeInBits();
+ LLT IntTy = LLT::scalar(NumBits);
+ auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
+ LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout()));
+
+ for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
+ auto Elt = MIRBuilder.buildExtractVectorElement(
+ SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
+ auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
+ auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
+ unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
+ ? (MemTy.getNumElements() - 1) - I
+ : I;
+ auto ShiftAmt = MIRBuilder.buildConstant(
+ IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
+ auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
+ CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
+ }
+ auto PtrInfo = MMO.getPointerInfo();
+ auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
+ MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
+ StoreMI.eraseFromParent();
+ return Legalized;
+ }
+
+ // TODO: implement simple scalarization.
+ return UnableToLegalize;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
switch (MI.getOpcode()) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 2fac100f81519a..641f06530a5c23 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -474,7 +474,8 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.customIf(IsPtrVecPred)
.scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
- .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
+ .lower();
getActionDefinitionsBuilder(G_INDEXED_STORE)
// Idx 0 == Ptr, Idx 1 == Val
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir
index 7b24bb1227fa26..4d461c971d3320 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s
---
name: scalar_to_oversize_vector
tracksRegLiveness: true
@@ -48,17 +48,66 @@ body: |
G_BR %bb.2
...
-# This test currently is expected to fall back after reaching truncstore of <8 x s8> as <8 x s1>.
---
name: boolean_vector_to_scalar
tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: boolean_vector_to_scalar
- ; CHECK: %vec:_(<8 x s1>) = G_IMPLICIT_DEF
+ ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s8>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s8>) = G_ANYEXT %vec(<8 x s1>)
- ; CHECK-NEXT: G_STORE [[ANYEXT]](<8 x s8>), [[FRAME_INDEX]](p0) :: (store (<8 x s1>) into %stack.0)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C]](s64)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s64)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C3]](s64)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC1]](s8)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s64)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C4]](s64)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC2]](s8)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s64)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C5]](s64)
+ ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC3]](s8)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C6]](s64)
+ ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC4]](s8)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C1]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C6]](s64)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C7]](s64)
+ ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC5]](s8)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C1]]
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s64)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C8]](s64)
+ ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC6]](s8)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C1]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s64)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C9]](s64)
+ ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC7]](s8)
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C1]]
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s64)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR7]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[FRAME_INDEX]](p0) :: (store (s8) into %stack.0)
; CHECK-NEXT: %bc:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s8) from %stack.0)
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %bc(s8)
; CHECK-NEXT: $w0 = COPY %ext(s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
index de70f89461780b..1df6297e363833 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
@@ -1,6 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
-# This test currently is expected to fall back after reaching truncstore of <8 x s8> as <8 x s1>.
+# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s
---
name: store_8xs1
tracksRegLiveness: true
@@ -13,12 +12,67 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %ptr:_(p0) = COPY $x0
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(slt), [[CONCAT_VECTORS]](<8 x s32>), [[BUILD_VECTOR]]
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s8>) = G_ANYEXT [[ICMP]](<8 x s1>)
- ; CHECK-NEXT: G_STORE [[ANYEXT]](<8 x s8>), %ptr(p0) :: (store (<8 x s1>))
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(slt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(slt), [[COPY1]](<4 x s32>), [[BUILD_VECTOR1]]
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
+ ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C1]](s64)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64)
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]]
+ ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+ ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C3]](s64)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC1]](s8)
+ ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
+ ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s64)
+ ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
+ ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+ ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C4]](s64)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC2]](s8)
+ ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
+ ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s64)
+ ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
+ ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+ ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C5]](s64)
+ ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC3]](s8)
+ ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]]
+ ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64)
+ ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
+ ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+ ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C6]](s64)
+ ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC4]](s8)
+ ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]]
+ ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C6]](s64)
+ ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
+ ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
+ ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C7]](s64)
+ ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC5]](s8)
+ ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]]
+ ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s64)
+ ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
+ ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
+ ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C8]](s64)
+ ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC6]](s8)
+ ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]]
+ ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s64)
+ ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
+ ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
+ ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C9]](s64)
+ ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC7]](s8)
+ ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]]
+ ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s64)
+ ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR7]](s32)
+ ; CHECK-NEXT: G_STORE [[TRUNC3]](s8), %ptr(p0) :: (store (s8))
; CHECK-NEXT: RET_ReallyLR
%1:_(<4 x s32>) = COPY $q0
%2:_(<4 x s32>) = COPY $q1
diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
index 7f2eefe5ed72f6..1fa96979f45530 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
@@ -1,26 +1,97 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SDAG
+; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -global-isel -global-isel-abort=2 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,GISEL
; Basic tests from input vector to bitmask
; IR generated from clang for:
; __builtin_convertvector + reinterpret_cast<uint16&>
+; GISEL: warning: Instruction selection used fallback path for convert_to_bitmask4
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask2
+; GISEL-NEXT: warning: Instruction selection used fallback path for clang_builtins_undef_concat_convert_to_bitmask4
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_no_compare
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_compare_chain
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_trunc_in_chain
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_unknown_type_in_long_chain
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_different_types_in_chain
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_2xi32
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_4xi8
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_8xi2
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_float
+; GISEL-NEXT: warning: Instruction selection used fallback path for convert_legalized_illegal_element_size
+; GISEL-NEXT: warning: Instruction selection used fallback path for no_direct_convert_for_bad_concat
+; GISEL-NEXT: warning: Instruction selection used fallback path for no_combine_illegal_num_elements
+
define i16 @convert_to_bitmask16(<16 x i8> %vec) {
; Bits used in mask
-; CHECK-LABEL: convert_to_bitmask16:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: Lloh0:
-; CHECK-NEXT: adrp x8, lCPI0_0 at PAGE
-; CHECK-NEXT: cmeq.16b v0, v0, #0
-; CHECK-NEXT: Lloh1:
-; CHECK-NEXT: ldr q1, [x8, lCPI0_0 at PAGEOFF]
-; CHECK-NEXT: bic.16b v0, v1, v0
-; CHECK-NEXT: ext.16b v1, v0, v0, #8
-; CHECK-NEXT: zip1.16b v0, v0, v1
-; CHECK-NEXT: addv.8h h0, v0
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
+; SDAG-LABEL: convert_to_bitmask16:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: adrp x8, lCPI0_0 at PAGE
+; SDAG-NEXT: cmeq.16b v0, v0, #0
+; SDAG-NEXT: ldr q1, [x8, lCPI0_0 at PAGEOFF]
+; SDAG-NEXT: bic.16b v0, v1, v0
+; SDAG-NEXT: ext.16b v1, v0, v0, #8
+; SDAG-NEXT: zip1.16b v0, v0, v1
+; SDAG-NEXT: addv.8h h0, v0
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: convert_to_bitmask16:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: sub sp, sp, #16
+; GISEL-NEXT: .cfi_def_cfa_offset 16
+; GISEL-NEXT: cmeq.16b v0, v0, #0
+; GISEL-NEXT: mvn.16b v0, v0
+; GISEL-NEXT: umov.b w8, v0[1]
+; GISEL-NEXT: umov.b w9, v0[0]
+; GISEL-NEXT: umov.b w10, v0[2]
+; GISEL-NEXT: umov.b w11, v0[3]
+; GISEL-NEXT: and w8, w8, #0x1
+; GISEL-NEXT: bfi w9, w8, #1, #31
+; GISEL-NEXT: and w8, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[4]
+; GISEL-NEXT: orr w8, w9, w8, lsl #2
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[5]
+; GISEL-NEXT: orr w8, w8, w9, lsl #3
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[6]
+; GISEL-NEXT: orr w8, w8, w9, lsl #4
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[7]
+; GISEL-NEXT: orr w8, w8, w9, lsl #5
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[8]
+; GISEL-NEXT: orr w8, w8, w9, lsl #6
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[9]
+; GISEL-NEXT: orr w8, w8, w9, lsl #7
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[10]
+; GISEL-NEXT: orr w8, w8, w9, lsl #8
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[11]
+; GISEL-NEXT: orr w8, w8, w9, lsl #9
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[12]
+; GISEL-NEXT: orr w8, w8, w9, lsl #10
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[13]
+; GISEL-NEXT: orr w8, w8, w9, lsl #11
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[14]
+; GISEL-NEXT: orr w8, w8, w9, lsl #12
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[15]
+; GISEL-NEXT: orr w8, w8, w9, lsl #13
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #14
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #15
+; GISEL-NEXT: strh w8, [sp, #14]
+; GISEL-NEXT: and w0, w8, #0xffff
+; GISEL-NEXT: add sp, sp, #16
+; GISEL-NEXT: ret
; Actual conversion
@@ -30,19 +101,50 @@ define i16 @convert_to_bitmask16(<16 x i8> %vec) {
}
define i16 @convert_to_bitmask8(<8 x i16> %vec) {
-; CHECK-LABEL: convert_to_bitmask8:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: Lloh2:
-; CHECK-NEXT: adrp x8, lCPI1_0 at PAGE
-; CHECK-NEXT: cmeq.8h v0, v0, #0
-; CHECK-NEXT: Lloh3:
-; CHECK-NEXT: ldr q1, [x8, lCPI1_0 at PAGEOFF]
-; CHECK-NEXT: bic.16b v0, v1, v0
-; CHECK-NEXT: addv.8h h0, v0
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: and w0, w8, #0xff
-; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
+; SDAG-LABEL: convert_to_bitmask8:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: adrp x8, lCPI1_0 at PAGE
+; SDAG-NEXT: cmeq.8h v0, v0, #0
+; SDAG-NEXT: ldr q1, [x8, lCPI1_0 at PAGEOFF]
+; SDAG-NEXT: bic.16b v0, v1, v0
+; SDAG-NEXT: addv.8h h0, v0
+; SDAG-NEXT: fmov w8, s0
+; SDAG-NEXT: and w0, w8, #0xff
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: convert_to_bitmask8:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: sub sp, sp, #16
+; GISEL-NEXT: .cfi_def_cfa_offset 16
+; GISEL-NEXT: cmeq.8h v0, v0, #0
+; GISEL-NEXT: mvn.16b v0, v0
+; GISEL-NEXT: xtn.8b v0, v0
+; GISEL-NEXT: umov.b w8, v0[1]
+; GISEL-NEXT: umov.b w9, v0[0]
+; GISEL-NEXT: umov.b w10, v0[2]
+; GISEL-NEXT: umov.b w11, v0[3]
+; GISEL-NEXT: and w8, w8, #0x1
+; GISEL-NEXT: bfi w9, w8, #1, #31
+; GISEL-NEXT: and w8, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[4]
+; GISEL-NEXT: orr w8, w9, w8, lsl #2
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[5]
+; GISEL-NEXT: orr w8, w8, w9, lsl #3
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[6]
+; GISEL-NEXT: orr w8, w8, w9, lsl #4
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[7]
+; GISEL-NEXT: orr w8, w8, w9, lsl #5
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #6
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #7
+; GISEL-NEXT: strb w8, [sp, #15]
+; GISEL-NEXT: and w0, w8, #0xff
+; GISEL-NEXT: add sp, sp, #16
+; GISEL-NEXT: ret
%cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
@@ -54,16 +156,13 @@ define i16 @convert_to_bitmask8(<8 x i16> %vec) {
define i4 @convert_to_bitmask4(<4 x i32> %vec) {
; CHECK-LABEL: convert_to_bitmask4:
; CHECK: ; %bb.0:
-; CHECK-NEXT: Lloh4:
; CHECK-NEXT: adrp x8, lCPI2_0 at PAGE
; CHECK-NEXT: cmeq.4s v0, v0, #0
-; CHECK-NEXT: Lloh5:
; CHECK-NEXT: ldr q1, [x8, lCPI2_0 at PAGEOFF]
; CHECK-NEXT: bic.16b v0, v1, v0
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
%cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
@@ -74,17 +173,14 @@ define i4 @convert_to_bitmask4(<4 x i32> %vec) {
define i8 @convert_to_bitmask2(<2 x i64> %vec) {
; CHECK-LABEL: convert_to_bitmask2:
; CHECK: ; %bb.0:
-; CHECK-NEXT: Lloh6:
; CHECK-NEXT: adrp x8, lCPI3_0 at PAGE
; CHECK-NEXT: cmeq.2d v0, v0, #0
-; CHECK-NEXT: Lloh7:
; CHECK-NEXT: ldr q1, [x8, lCPI3_0 at PAGEOFF]
; CHECK-NEXT: bic.16b v0, v1, v0
; CHECK-NEXT: addp.2d d0, v0
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x3
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
%cmp_result = icmp ne <2 x i64> %vec, zeroinitializer
@@ -97,16 +193,13 @@ define i8 @convert_to_bitmask2(<2 x i64> %vec) {
define i8 @clang_builtins_undef_concat_convert_to_bitmask4(<4 x i32> %vec) {
; CHECK-LABEL: clang_builtins_undef_concat_convert_to_bitmask4:
; CHECK: ; %bb.0:
-; CHECK-NEXT: Lloh8:
; CHECK-NEXT: adrp x8, lCPI4_0 at PAGE
; CHECK-NEXT: cmeq.4s v0, v0, #0
-; CHECK-NEXT: Lloh9:
; CHECK-NEXT: ldr q1, [x8, lCPI4_0 at PAGEOFF]
; CHECK-NEXT: bic.16b v0, v1, v0
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh9
%cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
@@ -120,9 +213,7 @@ define i4 @convert_to_bitmask_no_compare(<4 x i32> %vec1, <4 x i32> %vec2) {
; CHECK-LABEL: convert_to_bitmask_no_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: and.16b v0, v0, v1
-; CHECK-NEXT: Lloh10:
; CHECK-NEXT: adrp x8, lCPI5_0 at PAGE
-; CHECK-NEXT: Lloh11:
; CHECK-NEXT: ldr q1, [x8, lCPI5_0 at PAGEOFF]
; CHECK-NEXT: shl.4s v0, v0, #31
; CHECK-NEXT: cmlt.4s v0, v0, #0
@@ -130,7 +221,6 @@ define i4 @convert_to_bitmask_no_compare(<4 x i32> %vec1, <4 x i32> %vec2) {
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh11
%cmp = and <4 x i32> %vec1, %vec2
@@ -144,16 +234,13 @@ define i4 @convert_to_bitmask_with_compare_chain(<4 x i32> %vec1, <4 x i32> %vec
; CHECK: ; %bb.0:
; CHECK-NEXT: cmeq.4s v2, v0, #0
; CHECK-NEXT: cmeq.4s v0, v0, v1
-; CHECK-NEXT: Lloh12:
; CHECK-NEXT: adrp x8, lCPI6_0 at PAGE
-; CHECK-NEXT: Lloh13:
; CHECK-NEXT: ldr q1, [x8, lCPI6_0 at PAGEOFF]
; CHECK-NEXT: bic.16b v0, v0, v2
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh13
%cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
@@ -167,10 +254,8 @@ define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %ve
; CHECK-LABEL: convert_to_bitmask_with_trunc_in_chain:
; CHECK: ; %bb.0:
; CHECK-NEXT: cmeq.4s v0, v0, #0
-; CHECK-NEXT: Lloh14:
; CHECK-NEXT: adrp x8, lCPI7_0 at PAGE
; CHECK-NEXT: bic.16b v0, v1, v0
-; CHECK-NEXT: Lloh15:
; CHECK-NEXT: ldr q1, [x8, lCPI7_0 at PAGEOFF]
; CHECK-NEXT: shl.4s v0, v0, #31
; CHECK-NEXT: cmlt.4s v0, v0, #0
@@ -178,7 +263,6 @@ define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %ve
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh15
%cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
@@ -193,7 +277,6 @@ define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <
; CHECK: ; %bb.0:
; CHECK-NEXT: cmeq.4s v0, v0, #0
; CHECK-NEXT: cmeq.4s v1, v1, #0
-; CHECK-NEXT: Lloh16:
; CHECK-NEXT: adrp x8, lCPI8_0 at PAGE
; CHECK-NEXT: movi d2, #0x000000ffffffff
; CHECK-NEXT: movi d3, #0x00ffffffffffff
@@ -207,7 +290,6 @@ define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <
; CHECK-NEXT: mov.h v1[2], wzr
; CHECK-NEXT: orr.8b v0, v0, v3
; CHECK-NEXT: orr.8b v0, v1, v0
-; CHECK-NEXT: Lloh17:
; CHECK-NEXT: ldr d1, [x8, lCPI8_0 at PAGEOFF]
; CHECK-NEXT: shl.4h v0, v0, #15
; CHECK-NEXT: cmlt.4h v0, v0, #0
@@ -215,7 +297,6 @@ define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <
; CHECK-NEXT: addv.4h h0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh17
%cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
@@ -238,17 +319,14 @@ define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4
; CHECK: ; %bb.0:
; CHECK-NEXT: cmeq.4s v1, v1, #0
; CHECK-NEXT: cmeq.4h v0, v0, #0
-; CHECK-NEXT: Lloh18:
; CHECK-NEXT: adrp x8, lCPI9_0 at PAGE
; CHECK-NEXT: xtn.4h v1, v1
; CHECK-NEXT: orn.8b v0, v1, v0
-; CHECK-NEXT: Lloh19:
; CHECK-NEXT: ldr d1, [x8, lCPI9_0 at PAGEOFF]
; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: addv.4h h0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh19
%cmp1 = icmp ne <4 x i16> %vec1, zeroinitializer
@@ -259,21 +337,73 @@ define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4
}
define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) {
-; CHECK-LABEL: convert_to_bitmask_without_knowing_type:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: shl.16b v0, v0, #7
-; CHECK-NEXT: Lloh20:
-; CHECK-NEXT: adrp x8, lCPI10_0 at PAGE
-; CHECK-NEXT: Lloh21:
-; CHECK-NEXT: ldr q1, [x8, lCPI10_0 at PAGEOFF]
-; CHECK-NEXT: cmlt.16b v0, v0, #0
-; CHECK-NEXT: and.16b v0, v0, v1
-; CHECK-NEXT: ext.16b v1, v0, v0, #8
-; CHECK-NEXT: zip1.16b v0, v0, v1
-; CHECK-NEXT: addv.8h h0, v0
-; CHECK-NEXT: fmov w0, s0
-; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh20, Lloh21
+; SDAG-LABEL: convert_to_bitmask_without_knowing_type:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: shl.16b v0, v0, #7
+; SDAG-NEXT: adrp x8, lCPI10_0 at PAGE
+; SDAG-NEXT: ldr q1, [x8, lCPI10_0 at PAGEOFF]
+; SDAG-NEXT: cmlt.16b v0, v0, #0
+; SDAG-NEXT: and.16b v0, v0, v1
+; SDAG-NEXT: ext.16b v1, v0, v0, #8
+; SDAG-NEXT: zip1.16b v0, v0, v1
+; SDAG-NEXT: addv.8h h0, v0
+; SDAG-NEXT: fmov w0, s0
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: convert_to_bitmask_without_knowing_type:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: sub sp, sp, #16
+; GISEL-NEXT: .cfi_def_cfa_offset 16
+; GISEL-NEXT: umov.b w8, v0[1]
+; GISEL-NEXT: umov.b w9, v0[0]
+; GISEL-NEXT: umov.b w10, v0[2]
+; GISEL-NEXT: umov.b w11, v0[3]
+; GISEL-NEXT: and w8, w8, #0x1
+; GISEL-NEXT: bfi w9, w8, #1, #31
+; GISEL-NEXT: and w8, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[4]
+; GISEL-NEXT: orr w8, w9, w8, lsl #2
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[5]
+; GISEL-NEXT: orr w8, w8, w9, lsl #3
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[6]
+; GISEL-NEXT: orr w8, w8, w9, lsl #4
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[7]
+; GISEL-NEXT: orr w8, w8, w9, lsl #5
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[8]
+; GISEL-NEXT: orr w8, w8, w9, lsl #6
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[9]
+; GISEL-NEXT: orr w8, w8, w9, lsl #7
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[10]
+; GISEL-NEXT: orr w8, w8, w9, lsl #8
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[11]
+; GISEL-NEXT: orr w8, w8, w9, lsl #9
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[12]
+; GISEL-NEXT: orr w8, w8, w9, lsl #10
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[13]
+; GISEL-NEXT: orr w8, w8, w9, lsl #11
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[14]
+; GISEL-NEXT: orr w8, w8, w9, lsl #12
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[15]
+; GISEL-NEXT: orr w8, w8, w9, lsl #13
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #14
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #15
+; GISEL-NEXT: strh w8, [sp, #14]
+; GISEL-NEXT: and w0, w8, #0xffff
+; GISEL-NEXT: add sp, sp, #16
+; GISEL-NEXT: ret
%bitmask = bitcast <16 x i1> %vec to i16
ret i16 %bitmask
@@ -282,16 +412,13 @@ define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) {
define i2 @convert_to_bitmask_2xi32(<2 x i32> %vec) {
; CHECK-LABEL: convert_to_bitmask_2xi32:
; CHECK: ; %bb.0:
-; CHECK-NEXT: Lloh22:
; CHECK-NEXT: adrp x8, lCPI11_0 at PAGE
; CHECK-NEXT: cmeq.2s v0, v0, #0
-; CHECK-NEXT: Lloh23:
; CHECK-NEXT: ldr d1, [x8, lCPI11_0 at PAGEOFF]
; CHECK-NEXT: bic.8b v0, v1, v0
; CHECK-NEXT: addp.2s v0, v0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh23
%cmp_result = icmp ne <2 x i32> %vec, zeroinitializer
%bitmask = bitcast <2 x i1> %cmp_result to i2
@@ -302,16 +429,13 @@ define i4 @convert_to_bitmask_4xi8(<4 x i8> %vec) {
; CHECK-LABEL: convert_to_bitmask_4xi8:
; CHECK: ; %bb.0:
; CHECK-NEXT: bic.4h v0, #255, lsl #8
-; CHECK-NEXT: Lloh24:
; CHECK-NEXT: adrp x8, lCPI12_0 at PAGE
-; CHECK-NEXT: Lloh25:
; CHECK-NEXT: ldr d1, [x8, lCPI12_0 at PAGEOFF]
; CHECK-NEXT: cmeq.4h v0, v0, #0
; CHECK-NEXT: bic.8b v0, v1, v0
; CHECK-NEXT: addv.4h h0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh25
%cmp_result = icmp ne <4 x i8> %vec, zeroinitializer
%bitmask = bitcast <4 x i1> %cmp_result to i4
@@ -322,17 +446,14 @@ define i8 @convert_to_bitmask_8xi2(<8 x i2> %vec) {
; CHECK-LABEL: convert_to_bitmask_8xi2:
; CHECK: ; %bb.0:
; CHECK-NEXT: movi.8b v1, #3
-; CHECK-NEXT: Lloh26:
; CHECK-NEXT: adrp x8, lCPI13_0 at PAGE
; CHECK-NEXT: and.8b v0, v0, v1
-; CHECK-NEXT: Lloh27:
; CHECK-NEXT: ldr d1, [x8, lCPI13_0 at PAGEOFF]
; CHECK-NEXT: cmeq.8b v0, v0, #0
; CHECK-NEXT: bic.8b v0, v1, v0
; CHECK-NEXT: addv.8b b0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh27
%cmp_result = icmp ne <8 x i2> %vec, zeroinitializer
%bitmask = bitcast <8 x i1> %cmp_result to i8
@@ -344,16 +465,13 @@ define i4 @convert_to_bitmask_float(<4 x float> %vec) {
; CHECK: ; %bb.0:
; CHECK-NEXT: fcmgt.4s v1, v0, #0.0
; CHECK-NEXT: fcmlt.4s v0, v0, #0.0
-; CHECK-NEXT: Lloh28:
; CHECK-NEXT: adrp x8, lCPI14_0 at PAGE
; CHECK-NEXT: orr.16b v0, v0, v1
-; CHECK-NEXT: Lloh29:
; CHECK-NEXT: ldr q1, [x8, lCPI14_0 at PAGEOFF]
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh28, Lloh29
%cmp_result = fcmp one <4 x float> %vec, zeroinitializer
@@ -364,24 +482,58 @@ define i4 @convert_to_bitmask_float(<4 x float> %vec) {
; Larger vector types don't map directly, but the can be split/truncated and then converted.
; After the comparison against 0, this is truncated to <8 x i16>, which is valid again.
define i8 @convert_large_vector(<8 x i32> %vec) {
-; CHECK-LABEL: convert_large_vector:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: cmeq.4s v1, v1, #0
-; CHECK-NEXT: cmeq.4s v0, v0, #0
-; CHECK-NEXT: Lloh30:
-; CHECK-NEXT: adrp x8, lCPI15_0 at PAGE
-; CHECK-NEXT: uzp1.8h v0, v0, v1
-; CHECK-NEXT: Lloh31:
-; CHECK-NEXT: ldr q1, [x8, lCPI15_0 at PAGEOFF]
-; CHECK-NEXT: bic.16b v0, v1, v0
-; CHECK-NEXT: addv.8h h0, v0
-; CHECK-NEXT: fmov w8, s0
-; CHECK-NEXT: and w0, w8, #0xff
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh31
+; SDAG-LABEL: convert_large_vector:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: sub sp, sp, #16
+; SDAG-NEXT: .cfi_def_cfa_offset 16
+; SDAG-NEXT: cmeq.4s v1, v1, #0
+; SDAG-NEXT: cmeq.4s v0, v0, #0
+; SDAG-NEXT: adrp x8, lCPI15_0 at PAGE
+; SDAG-NEXT: uzp1.8h v0, v0, v1
+; SDAG-NEXT: ldr q1, [x8, lCPI15_0 at PAGEOFF]
+; SDAG-NEXT: bic.16b v0, v1, v0
+; SDAG-NEXT: addv.8h h0, v0
+; SDAG-NEXT: fmov w8, s0
+; SDAG-NEXT: and w0, w8, #0xff
+; SDAG-NEXT: add sp, sp, #16
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: convert_large_vector:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: sub sp, sp, #16
+; GISEL-NEXT: .cfi_def_cfa_offset 16
+; GISEL-NEXT: cmeq.4s v0, v0, #0
+; GISEL-NEXT: cmeq.4s v1, v1, #0
+; GISEL-NEXT: mvn.16b v0, v0
+; GISEL-NEXT: mvn.16b v1, v1
+; GISEL-NEXT: uzp1.8h v0, v0, v1
+; GISEL-NEXT: xtn.8b v0, v0
+; GISEL-NEXT: umov.b w8, v0[1]
+; GISEL-NEXT: umov.b w9, v0[0]
+; GISEL-NEXT: umov.b w10, v0[2]
+; GISEL-NEXT: umov.b w11, v0[3]
+; GISEL-NEXT: and w8, w8, #0x1
+; GISEL-NEXT: bfi w9, w8, #1, #31
+; GISEL-NEXT: and w8, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[4]
+; GISEL-NEXT: orr w8, w9, w8, lsl #2
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[5]
+; GISEL-NEXT: orr w8, w8, w9, lsl #3
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: umov.b w10, v0[6]
+; GISEL-NEXT: orr w8, w8, w9, lsl #4
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: umov.b w11, v0[7]
+; GISEL-NEXT: orr w8, w8, w9, lsl #5
+; GISEL-NEXT: and w9, w10, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #6
+; GISEL-NEXT: and w9, w11, #0x1
+; GISEL-NEXT: orr w8, w8, w9, lsl #7
+; GISEL-NEXT: strb w8, [sp, #15]
+; GISEL-NEXT: and w0, w8, #0xff
+; GISEL-NEXT: add sp, sp, #16
+; GISEL-NEXT: ret
%cmp_result = icmp ne <8 x i32> %vec, zeroinitializer
@@ -393,17 +545,14 @@ define i4 @convert_legalized_illegal_element_size(<4 x i22> %vec) {
; CHECK-LABEL: convert_legalized_illegal_element_size:
; CHECK: ; %bb.0:
; CHECK-NEXT: movi.4s v1, #63, msl #16
-; CHECK-NEXT: Lloh32:
; CHECK-NEXT: adrp x8, lCPI16_0 at PAGE
; CHECK-NEXT: cmtst.4s v0, v0, v1
-; CHECK-NEXT: Lloh33:
; CHECK-NEXT: ldr d1, [x8, lCPI16_0 at PAGEOFF]
; CHECK-NEXT: xtn.4h v0, v0
; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: addv.4h h0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh33
%cmp_result = icmp ne <4 x i22> %vec, zeroinitializer
%bitmask = bitcast <4 x i1> %cmp_result to i4
@@ -415,7 +564,6 @@ define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) {
; CHECK-LABEL: no_direct_convert_for_bad_concat:
; CHECK: ; %bb.0:
; CHECK-NEXT: cmtst.4s v0, v0, v0
-; CHECK-NEXT: Lloh34:
; CHECK-NEXT: adrp x8, lCPI17_0 at PAGE
; CHECK-NEXT: xtn.4h v0, v0
; CHECK-NEXT: umov.h w9, v0[0]
@@ -427,14 +575,12 @@ define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) {
; CHECK-NEXT: umov.h w9, v0[3]
; CHECK-NEXT: mov.b v1[7], w9
; CHECK-NEXT: shl.8b v0, v1, #7
-; CHECK-NEXT: Lloh35:
; CHECK-NEXT: ldr d1, [x8, lCPI17_0 at PAGEOFF]
; CHECK-NEXT: cmlt.8b v0, v0, #0
; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: addv.8b b0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh35
%cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
%vector_pad = shufflevector <4 x i1> poison, <4 x i1> %cmp_result, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7>
@@ -443,11 +589,18 @@ define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) {
}
define <8 x i1> @no_convert_without_direct_bitcast(<8 x i16> %vec) {
-; CHECK-LABEL: no_convert_without_direct_bitcast:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: cmtst.8h v0, v0, v0
-; CHECK-NEXT: xtn.8b v0, v0
-; CHECK-NEXT: ret
+; SDAG-LABEL: no_convert_without_direct_bitcast:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: cmtst.8h v0, v0, v0
+; SDAG-NEXT: xtn.8b v0, v0
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: no_convert_without_direct_bitcast:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: cmeq.8h v0, v0, #0
+; GISEL-NEXT: mvn.16b v0, v0
+; GISEL-NEXT: xtn.8b v0, v0
+; GISEL-NEXT: ret
%cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
ret <8 x i1> %cmp_result
@@ -492,28 +645,220 @@ define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) {
; Only apply the combine when casting a vector to a scalar.
define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind {
-; CHECK-LABEL: vector_to_vector_cast:
-; CHECK: ; %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: shl.16b v0, v0, #7
-; CHECK-NEXT: Lloh36:
-; CHECK-NEXT: adrp x8, lCPI20_0 at PAGE
-; CHECK-NEXT: Lloh37:
-; CHECK-NEXT: ldr q1, [x8, lCPI20_0 at PAGEOFF]
-; CHECK-NEXT: add x8, sp, #14
-; CHECK-NEXT: cmlt.16b v0, v0, #0
-; CHECK-NEXT: and.16b v0, v0, v1
-; CHECK-NEXT: ext.16b v1, v0, v0, #8
-; CHECK-NEXT: zip1.16b v0, v0, v1
-; CHECK-NEXT: addv.8h h0, v0
-; CHECK-NEXT: str h0, [sp, #14]
-; CHECK-NEXT: ld1.b { v0 }[0], [x8]
-; CHECK-NEXT: orr x8, x8, #0x1
-; CHECK-NEXT: ld1.b { v0 }[4], [x8]
-; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ret
-; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh37
+; SDAG-LABEL: vector_to_vector_cast:
+; SDAG: ; %bb.0:
+; SDAG-NEXT: sub sp, sp, #16
+; SDAG-NEXT: shl.16b v0, v0, #7
+; SDAG-NEXT: adrp x8, lCPI20_0 at PAGE
+; SDAG-NEXT: ldr q1, [x8, lCPI20_0 at PAGEOFF]
+; SDAG-NEXT: add x8, sp, #14
+; SDAG-NEXT: cmlt.16b v0, v0, #0
+; SDAG-NEXT: and.16b v0, v0, v1
+; SDAG-NEXT: ext.16b v1, v0, v0, #8
+; SDAG-NEXT: zip1.16b v0, v0, v1
+; SDAG-NEXT: addv.8h h0, v0
+; SDAG-NEXT: str h0, [sp, #14]
+; SDAG-NEXT: ld1.b { v0 }[0], [x8]
+; SDAG-NEXT: orr x8, x8, #0x1
+; SDAG-NEXT: ld1.b { v0 }[4], [x8]
+; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0
+; SDAG-NEXT: add sp, sp, #16
+; SDAG-NEXT: ret
+;
+; GISEL-LABEL: vector_to_vector_cast:
+; GISEL: ; %bb.0:
+; GISEL-NEXT: sub sp, sp, #16
+; GISEL-NEXT: umov.b w8, v0[1]
+; GISEL-NEXT: mov d1, v0[1]
+; GISEL-NEXT: umov.b w10, v0[1]
+; GISEL-NEXT: umov.b w9, v0[0]
+; GISEL-NEXT: umov.b w13, v0[0]
+; GISEL-NEXT: umov.b w14, v0[2]
+; GISEL-NEXT: umov.b w15, v0[3]
+; GISEL-NEXT: umov.b w11, v0[2]
+; GISEL-NEXT: umov.b w16, v0[4]
+; GISEL-NEXT: umov.b w17, v0[5]
+; GISEL-NEXT: umov.b w12, v0[3]
+; GISEL-NEXT: and w8, w8, #0x1
+; GISEL-NEXT: and w10, w10, #0x1
+; GISEL-NEXT: umov.b w0, v1[1]
+; GISEL-NEXT: bfi w9, w8, #1, #31
+; GISEL-NEXT: bfi w13, w10, #1, #31
+; GISEL-NEXT: and w14, w14, #0x1
+; GISEL-NEXT: umov.b w8, v1[0]
+; GISEL-NEXT: umov.b w10, v1[2]
+; GISEL-NEXT: and w15, w15, #0x1
+; GISEL-NEXT: orr w13, w13, w14, lsl #2
+; GISEL-NEXT: umov.b w14, v1[3]
+; GISEL-NEXT: and w11, w11, #0x1
+; GISEL-NEXT: and w0, w0, #0x1
+; GISEL-NEXT: and w16, w16, #0x1
+; GISEL-NEXT: orr w9, w9, w11, lsl #2
+; GISEL-NEXT: orr w13, w13, w15, lsl #3
+; GISEL-NEXT: umov.b w15, v1[4]
+; GISEL-NEXT: umov.b w11, v0[6]
+; GISEL-NEXT: bfi w8, w0, #1, #31
+; GISEL-NEXT: and w10, w10, #0x1
+; GISEL-NEXT: and w17, w17, #0x1
+; GISEL-NEXT: orr w13, w13, w16, lsl #4
+; GISEL-NEXT: and w14, w14, #0x1
+; GISEL-NEXT: umov.b w0, v0[7]
+; GISEL-NEXT: orr w8, w8, w10, lsl #2
+; GISEL-NEXT: umov.b w10, v1[5]
+; GISEL-NEXT: umov.b w16, v1[6]
+; GISEL-NEXT: orr w13, w13, w17, lsl #5
+; GISEL-NEXT: umov.b w17, v0[4]
+; GISEL-NEXT: and w15, w15, #0x1
+; GISEL-NEXT: orr w8, w8, w14, lsl #3
+; GISEL-NEXT: and w12, w12, #0x1
+; GISEL-NEXT: and w11, w11, #0x1
+; GISEL-NEXT: umov.b w14, v1[7]
+; GISEL-NEXT: orr w9, w9, w12, lsl #3
+; GISEL-NEXT: orr w11, w13, w11, lsl #6
+; GISEL-NEXT: orr w8, w8, w15, lsl #4
+; GISEL-NEXT: umov.b w15, v0[5]
+; GISEL-NEXT: and w10, w10, #0x1
+; GISEL-NEXT: and w0, w0, #0x1
+; GISEL-NEXT: and w12, w17, #0x1
+; GISEL-NEXT: umov.b w13, v0[1]
+; GISEL-NEXT: orr w8, w8, w10, lsl #5
+; GISEL-NEXT: and w16, w16, #0x1
+; GISEL-NEXT: orr w9, w9, w12, lsl #4
+; GISEL-NEXT: umov.b w10, v0[0]
+; GISEL-NEXT: orr w11, w11, w0, lsl #7
+; GISEL-NEXT: and w14, w14, #0x1
+; GISEL-NEXT: and w12, w15, #0x1
+; GISEL-NEXT: umov.b w15, v0[2]
+; GISEL-NEXT: orr w8, w8, w16, lsl #6
+; GISEL-NEXT: orr w9, w9, w12, lsl #5
+; GISEL-NEXT: umov.b w12, v0[6]
+; GISEL-NEXT: strb w11, [sp, #8]
+; GISEL-NEXT: and w11, w13, #0x1
+; GISEL-NEXT: umov.b w13, v0[3]
+; GISEL-NEXT: orr w8, w8, w14, lsl #7
+; GISEL-NEXT: umov.b w14, v0[7]
+; GISEL-NEXT: ldr b0, [sp, #8]
+; GISEL-NEXT: bfi w10, w11, #1, #31
+; GISEL-NEXT: and w11, w15, #0x1
+; GISEL-NEXT: strb w8, [sp, #9]
+; GISEL-NEXT: umov.b w15, v0[4]
+; GISEL-NEXT: and w8, w12, #0x1
+; GISEL-NEXT: orr w10, w10, w11, lsl #2
+; GISEL-NEXT: orr w8, w9, w8, lsl #6
+; GISEL-NEXT: and w9, w13, #0x1
+; GISEL-NEXT: umov.b w11, v0[1]
+; GISEL-NEXT: orr w9, w10, w9, lsl #3
+; GISEL-NEXT: umov.b w10, v0[5]
+; GISEL-NEXT: umov.b w12, v0[0]
+; GISEL-NEXT: and w13, w14, #0x1
+; GISEL-NEXT: umov.b w16, v0[2]
+; GISEL-NEXT: umov.b w17, v0[3]
+; GISEL-NEXT: and w14, w15, #0x1
+; GISEL-NEXT: umov.b w15, v0[2]
+; GISEL-NEXT: orr w8, w8, w13, lsl #7
+; GISEL-NEXT: orr w9, w9, w14, lsl #4
+; GISEL-NEXT: umov.b w13, v0[6]
+; GISEL-NEXT: and w11, w11, #0x1
+; GISEL-NEXT: umov.b w14, v0[3]
+; GISEL-NEXT: strb w8, [sp, #10]
+; GISEL-NEXT: and w8, w10, #0x1
+; GISEL-NEXT: bfi w12, w11, #1, #31
+; GISEL-NEXT: orr w8, w9, w8, lsl #5
+; GISEL-NEXT: umov.b w10, v0[4]
+; GISEL-NEXT: and w9, w15, #0x1
+; GISEL-NEXT: umov.b w11, v0[7]
+; GISEL-NEXT: umov.b w15, v0[1]
+; GISEL-NEXT: orr w9, w12, w9, lsl #2
+; GISEL-NEXT: umov.b w12, v0[5]
+; GISEL-NEXT: and w13, w13, #0x1
+; GISEL-NEXT: and w14, w14, #0x1
+; GISEL-NEXT: orr w8, w8, w13, lsl #6
+; GISEL-NEXT: umov.b w13, v0[0]
+; GISEL-NEXT: orr w9, w9, w14, lsl #3
+; GISEL-NEXT: and w10, w10, #0x1
+; GISEL-NEXT: umov.b w14, v0[6]
+; GISEL-NEXT: and w11, w11, #0x1
+; GISEL-NEXT: and w15, w15, #0x1
+; GISEL-NEXT: umov.b w0, v0[3]
+; GISEL-NEXT: orr w9, w9, w10, lsl #4
+; GISEL-NEXT: and w10, w12, #0x1
+; GISEL-NEXT: umov.b w12, v0[7]
+; GISEL-NEXT: orr w8, w8, w11, lsl #7
+; GISEL-NEXT: bfi w13, w15, #1, #31
+; GISEL-NEXT: and w11, w16, #0x1
+; GISEL-NEXT: orr w9, w9, w10, lsl #5
+; GISEL-NEXT: and w10, w14, #0x1
+; GISEL-NEXT: umov.b w14, v0[4]
+; GISEL-NEXT: strb w8, [sp, #11]
+; GISEL-NEXT: umov.b w15, v0[1]
+; GISEL-NEXT: umov.b w16, v0[3]
+; GISEL-NEXT: orr w8, w9, w10, lsl #6
+; GISEL-NEXT: orr w9, w13, w11, lsl #2
+; GISEL-NEXT: and w10, w12, #0x1
+; GISEL-NEXT: and w11, w17, #0x1
+; GISEL-NEXT: umov.b w12, v0[5]
+; GISEL-NEXT: umov.b w17, v0[0]
+; GISEL-NEXT: orr w8, w8, w10, lsl #7
+; GISEL-NEXT: orr w9, w9, w11, lsl #3
+; GISEL-NEXT: umov.b w10, v0[1]
+; GISEL-NEXT: and w11, w14, #0x1
+; GISEL-NEXT: umov.b w14, v0[0]
+; GISEL-NEXT: and w15, w15, #0x1
+; GISEL-NEXT: orr w9, w9, w11, lsl #4
+; GISEL-NEXT: umov.b w11, v0[2]
+; GISEL-NEXT: umov.b w13, v0[6]
+; GISEL-NEXT: and w12, w12, #0x1
+; GISEL-NEXT: bfi w17, w15, #1, #31
+; GISEL-NEXT: umov.b w15, v0[5]
+; GISEL-NEXT: orr w9, w9, w12, lsl #5
+; GISEL-NEXT: and w10, w10, #0x1
+; GISEL-NEXT: umov.b w12, v0[2]
+; GISEL-NEXT: bfi w14, w10, #1, #31
+; GISEL-NEXT: umov.b w10, v0[4]
+; GISEL-NEXT: ldr b1, [sp, #9]
+; GISEL-NEXT: and w11, w11, #0x1
+; GISEL-NEXT: and w13, w13, #0x1
+; GISEL-NEXT: strb w8, [sp, #12]
+; GISEL-NEXT: orr w11, w14, w11, lsl #2
+; GISEL-NEXT: and w14, w16, #0x1
+; GISEL-NEXT: umov.b w16, v0[4]
+; GISEL-NEXT: and w12, w12, #0x1
+; GISEL-NEXT: and w15, w15, #0x1
+; GISEL-NEXT: orr w9, w9, w13, lsl #6
+; GISEL-NEXT: orr w11, w11, w14, lsl #3
+; GISEL-NEXT: orr w12, w17, w12, lsl #2
+; GISEL-NEXT: and w10, w10, #0x1
+; GISEL-NEXT: and w17, w0, #0x1
+; GISEL-NEXT: umov.b w0, v0[5]
+; GISEL-NEXT: umov.b w14, v0[6]
+; GISEL-NEXT: orr w10, w11, w10, lsl #4
+; GISEL-NEXT: orr w12, w12, w17, lsl #3
+; GISEL-NEXT: umov.b w11, v0[7]
+; GISEL-NEXT: and w16, w16, #0x1
+; GISEL-NEXT: umov.b w17, v0[6]
+; GISEL-NEXT: orr w10, w10, w15, lsl #5
+; GISEL-NEXT: umov.b w15, v0[7]
+; GISEL-NEXT: orr w12, w12, w16, lsl #4
+; GISEL-NEXT: and w16, w0, #0x1
+; GISEL-NEXT: umov.b w0, v0[7]
+; GISEL-NEXT: and w14, w14, #0x1
+; GISEL-NEXT: orr w12, w12, w16, lsl #5
+; GISEL-NEXT: orr w10, w10, w14, lsl #6
+; GISEL-NEXT: and w11, w11, #0x1
+; GISEL-NEXT: and w13, w17, #0x1
+; GISEL-NEXT: orr w9, w9, w11, lsl #7
+; GISEL-NEXT: mov.s v0[1], v1[0]
+; GISEL-NEXT: orr w11, w12, w13, lsl #6
+; GISEL-NEXT: and w12, w15, #0x1
+; GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
+; GISEL-NEXT: orr w8, w10, w12, lsl #7
+; GISEL-NEXT: and w10, w0, #0x1
+; GISEL-NEXT: strb w9, [sp, #13]
+; GISEL-NEXT: orr w9, w11, w10, lsl #7
+; GISEL-NEXT: strb w8, [sp, #14]
+; GISEL-NEXT: strb w9, [sp, #15]
+; GISEL-NEXT: add sp, sp, #16
+; GISEL-NEXT: ret
%bc = bitcast <16 x i1> %arg to <2 x i8>
ret <2 x i8> %bc
}
>From 366d57a3b43847b25b370a6da0e439e1bf7e7435 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Fri, 27 Dec 2024 20:54:02 -0800
Subject: [PATCH 4/4] Try to fix commit stack
Created using spr 1.3.5
---
.../llvm/CodeGen/GlobalISel/LegalizerHelper.h | 3 -
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 47 +-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 3 +-
.../AArch64/GlobalISel/legalize-bitcast.mir | 59 +-
.../legalize-store-vector-bools.mir | 68 +-
.../AArch64/vec-combine-compare-to-bitmask.ll | 605 ++++--------------
6 files changed, 145 insertions(+), 640 deletions(-)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 4e18f5cc913a7e..fac059803b9489 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -302,9 +302,6 @@ class LegalizerHelper {
/// same type as \p Res.
MachineInstrBuilder createStackStoreLoad(const DstOp &Res, const SrcOp &Val);
- /// Given a store of a boolean vector, scalarize it.
- LegalizeResult scalarizeVectorBooleanStore(GStore &MI);
-
/// Get a pointer to vector element \p Index located in memory for a vector of
/// type \p VecTy starting at a base address of \p VecPtr. If \p Index is out
/// of bounds the returned pointer is unspecified, but will be within the
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 0bfa897ecf4047..7dece931e8e0eb 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -4143,8 +4143,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
}
if (MemTy.isVector()) {
+ // TODO: Handle vector trunc stores
if (MemTy != SrcTy)
- return scalarizeVectorBooleanStore(StoreMI);
+ return UnableToLegalize;
// TODO: We can do better than scalarizing the vector and at least split it
// in half.
@@ -4199,50 +4200,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
return Legalized;
}
-LegalizerHelper::LegalizeResult
-LegalizerHelper::scalarizeVectorBooleanStore(GStore &StoreMI) {
- Register SrcReg = StoreMI.getValueReg();
- Register PtrReg = StoreMI.getPointerReg();
- LLT SrcTy = MRI.getType(SrcReg);
- MachineMemOperand &MMO = **StoreMI.memoperands_begin();
- LLT MemTy = MMO.getMemoryType();
- LLT MemScalarTy = MemTy.getElementType();
- MachineFunction &MF = MIRBuilder.getMF();
-
- assert(SrcTy.isVector() && "Expect a vector store type");
-
- if (!MemScalarTy.isByteSized()) {
- // We need to build an integer scalar of the vector bit pattern.
- // It's not legal for us to add padding when storing a vector.
- unsigned NumBits = MemTy.getSizeInBits();
- LLT IntTy = LLT::scalar(NumBits);
- auto CurrVal = MIRBuilder.buildConstant(IntTy, 0);
- LLT IdxTy = getLLTForMVT(TLI.getVectorIdxTy(MF.getDataLayout()));
-
- for (unsigned I = 0, E = MemTy.getNumElements(); I < E; ++I) {
- auto Elt = MIRBuilder.buildExtractVectorElement(
- SrcTy.getElementType(), SrcReg, MIRBuilder.buildConstant(IdxTy, I));
- auto Trunc = MIRBuilder.buildTrunc(MemScalarTy, Elt);
- auto ZExt = MIRBuilder.buildZExt(IntTy, Trunc);
- unsigned ShiftIntoIdx = MF.getDataLayout().isBigEndian()
- ? (MemTy.getNumElements() - 1) - I
- : I;
- auto ShiftAmt = MIRBuilder.buildConstant(
- IntTy, ShiftIntoIdx * MemScalarTy.getSizeInBits());
- auto Shifted = MIRBuilder.buildShl(IntTy, ZExt, ShiftAmt);
- CurrVal = MIRBuilder.buildOr(IntTy, CurrVal, Shifted);
- }
- auto PtrInfo = MMO.getPointerInfo();
- auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, IntTy);
- MIRBuilder.buildStore(CurrVal, PtrReg, *NewMMO);
- StoreMI.eraseFromParent();
- return Legalized;
- }
-
- // TODO: implement simple scalarization.
- return UnableToLegalize;
-}
-
LegalizerHelper::LegalizeResult
LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
switch (MI.getOpcode()) {
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 641f06530a5c23..2fac100f81519a 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -474,8 +474,7 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.customIf(IsPtrVecPred)
.scalarizeIf(typeInSet(0, {v2s16, v2s8}), 0)
- .scalarizeIf(scalarOrEltWiderThan(0, 64), 0)
- .lower();
+ .scalarizeIf(scalarOrEltWiderThan(0, 64), 0);
getActionDefinitionsBuilder(G_INDEXED_STORE)
// Idx 0 == Ptr, Idx 1 == Val
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir
index 4d461c971d3320..7b24bb1227fa26 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-bitcast.mir
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
-# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s
+# RUN: llc -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
---
name: scalar_to_oversize_vector
tracksRegLiveness: true
@@ -48,66 +48,17 @@ body: |
G_BR %bb.2
...
+# This test currently is expected to fall back after reaching truncstore of <8 x s8> as <8 x s1>.
---
name: boolean_vector_to_scalar
tracksRegLiveness: true
body: |
bb.1:
; CHECK-LABEL: name: boolean_vector_to_scalar
- ; CHECK: [[DEF:%[0-9]+]]:_(<8 x s8>) = G_IMPLICIT_DEF
+ ; CHECK: %vec:_(<8 x s1>) = G_IMPLICIT_DEF
; CHECK-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %stack.0
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C]](s64)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C1]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C]](s64)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[C2]], [[SHL]]
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C3]](s64)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC1]](s8)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C1]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s64)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C4]](s64)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC2]](s8)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C1]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s64)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C5]](s64)
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC3]](s8)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C1]]
- ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C6]](s64)
- ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC4]](s8)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C1]]
- ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C6]](s64)
- ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
- ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
- ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C7]](s64)
- ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC5]](s8)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C1]]
- ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s64)
- ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
- ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
- ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C8]](s64)
- ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC6]](s8)
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C1]]
- ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s64)
- ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
- ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
- ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[DEF]](<8 x s8>), [[C9]](s64)
- ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC7]](s8)
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C1]]
- ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s64)
- ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[OR7]](s32)
- ; CHECK-NEXT: G_STORE [[TRUNC]](s8), [[FRAME_INDEX]](p0) :: (store (s8) into %stack.0)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s8>) = G_ANYEXT %vec(<8 x s1>)
+ ; CHECK-NEXT: G_STORE [[ANYEXT]](<8 x s8>), [[FRAME_INDEX]](p0) :: (store (<8 x s1>) into %stack.0)
; CHECK-NEXT: %bc:_(s8) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s8) from %stack.0)
; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %bc(s8)
; CHECK-NEXT: $w0 = COPY %ext(s32)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
index 1df6297e363833..de70f89461780b 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-store-vector-bools.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
-# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=1 %s -o - | FileCheck %s
+# RUN: llc -O0 -mtriple=aarch64 -run-pass=legalizer -global-isel-abort=2 %s -o - | FileCheck %s
+# This test currently is expected to fall back after reaching truncstore of <8 x s8> as <8 x s1>.
---
name: store_8xs1
tracksRegLiveness: true
@@ -12,67 +13,12 @@ body: |
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
; CHECK-NEXT: %ptr:_(p0) = COPY $x0
+ ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s32>) = G_CONCAT_VECTORS [[COPY]](<4 x s32>), [[COPY1]](<4 x s32>)
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
- ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(slt), [[COPY]](<4 x s32>), [[BUILD_VECTOR]]
- ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(<4 x s32>) = G_ICMP intpred(slt), [[COPY1]](<4 x s32>), [[BUILD_VECTOR1]]
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
- ; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C1]](s64)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC]](s8)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[ANYEXT]], [[C2]]
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND]], [[C1]](s64)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL]]
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
- ; CHECK-NEXT: [[EVEC1:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C3]](s64)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC1]](s8)
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[ANYEXT1]], [[C2]]
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C3]](s64)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[OR]], [[SHL1]]
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
- ; CHECK-NEXT: [[EVEC2:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C4]](s64)
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC2]](s8)
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[ANYEXT2]], [[C2]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[C4]](s64)
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL2]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
- ; CHECK-NEXT: [[EVEC3:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C5]](s64)
- ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC3]](s8)
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[ANYEXT3]], [[C2]]
- ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[C5]](s64)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR2]], [[SHL3]]
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
- ; CHECK-NEXT: [[EVEC4:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C6]](s64)
- ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC4]](s8)
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[ANYEXT4]], [[C2]]
- ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[C6]](s64)
- ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR3]], [[SHL4]]
- ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 5
- ; CHECK-NEXT: [[EVEC5:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C7]](s64)
- ; CHECK-NEXT: [[ANYEXT5:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC5]](s8)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[ANYEXT5]], [[C2]]
- ; CHECK-NEXT: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[C7]](s64)
- ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[OR4]], [[SHL5]]
- ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 6
- ; CHECK-NEXT: [[EVEC6:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C8]](s64)
- ; CHECK-NEXT: [[ANYEXT6:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC6]](s8)
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[ANYEXT6]], [[C2]]
- ; CHECK-NEXT: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[C8]](s64)
- ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[OR5]], [[SHL6]]
- ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s64) = G_CONSTANT i64 7
- ; CHECK-NEXT: [[EVEC7:%[0-9]+]]:_(s8) = G_EXTRACT_VECTOR_ELT [[TRUNC2]](<8 x s8>), [[C9]](s64)
- ; CHECK-NEXT: [[ANYEXT7:%[0-9]+]]:_(s32) = G_ANYEXT [[EVEC7]](s8)
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[ANYEXT7]], [[C2]]
- ; CHECK-NEXT: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[C9]](s64)
- ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[OR6]], [[SHL7]]
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[OR7]](s32)
- ; CHECK-NEXT: G_STORE [[TRUNC3]](s8), %ptr(p0) :: (store (s8))
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32), [[C]](s32)
+ ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(slt), [[CONCAT_VECTORS]](<8 x s32>), [[BUILD_VECTOR]]
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s8>) = G_ANYEXT [[ICMP]](<8 x s1>)
+ ; CHECK-NEXT: G_STORE [[ANYEXT]](<8 x s8>), %ptr(p0) :: (store (<8 x s1>))
; CHECK-NEXT: RET_ReallyLR
%1:_(<4 x s32>) = COPY $q0
%2:_(<4 x s32>) = COPY $q1
diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
index 1fa96979f45530..7f2eefe5ed72f6 100644
--- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
+++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll
@@ -1,97 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,SDAG
-; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -global-isel -global-isel-abort=2 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
; Basic tests from input vector to bitmask
; IR generated from clang for:
; __builtin_convertvector + reinterpret_cast<uint16&>
-; GISEL: warning: Instruction selection used fallback path for convert_to_bitmask4
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask2
-; GISEL-NEXT: warning: Instruction selection used fallback path for clang_builtins_undef_concat_convert_to_bitmask4
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_no_compare
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_compare_chain
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_trunc_in_chain
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_unknown_type_in_long_chain
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_with_different_types_in_chain
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_2xi32
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_4xi8
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_8xi2
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_float
-; GISEL-NEXT: warning: Instruction selection used fallback path for convert_legalized_illegal_element_size
-; GISEL-NEXT: warning: Instruction selection used fallback path for no_direct_convert_for_bad_concat
-; GISEL-NEXT: warning: Instruction selection used fallback path for no_combine_illegal_num_elements
-
define i16 @convert_to_bitmask16(<16 x i8> %vec) {
; Bits used in mask
-; SDAG-LABEL: convert_to_bitmask16:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, lCPI0_0 at PAGE
-; SDAG-NEXT: cmeq.16b v0, v0, #0
-; SDAG-NEXT: ldr q1, [x8, lCPI0_0 at PAGEOFF]
-; SDAG-NEXT: bic.16b v0, v1, v0
-; SDAG-NEXT: ext.16b v1, v0, v0, #8
-; SDAG-NEXT: zip1.16b v0, v0, v1
-; SDAG-NEXT: addv.8h h0, v0
-; SDAG-NEXT: fmov w0, s0
-; SDAG-NEXT: ret
-;
-; GISEL-LABEL: convert_to_bitmask16:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: sub sp, sp, #16
-; GISEL-NEXT: .cfi_def_cfa_offset 16
-; GISEL-NEXT: cmeq.16b v0, v0, #0
-; GISEL-NEXT: mvn.16b v0, v0
-; GISEL-NEXT: umov.b w8, v0[1]
-; GISEL-NEXT: umov.b w9, v0[0]
-; GISEL-NEXT: umov.b w10, v0[2]
-; GISEL-NEXT: umov.b w11, v0[3]
-; GISEL-NEXT: and w8, w8, #0x1
-; GISEL-NEXT: bfi w9, w8, #1, #31
-; GISEL-NEXT: and w8, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[4]
-; GISEL-NEXT: orr w8, w9, w8, lsl #2
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[5]
-; GISEL-NEXT: orr w8, w8, w9, lsl #3
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[6]
-; GISEL-NEXT: orr w8, w8, w9, lsl #4
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[7]
-; GISEL-NEXT: orr w8, w8, w9, lsl #5
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[8]
-; GISEL-NEXT: orr w8, w8, w9, lsl #6
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[9]
-; GISEL-NEXT: orr w8, w8, w9, lsl #7
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[10]
-; GISEL-NEXT: orr w8, w8, w9, lsl #8
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[11]
-; GISEL-NEXT: orr w8, w8, w9, lsl #9
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[12]
-; GISEL-NEXT: orr w8, w8, w9, lsl #10
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[13]
-; GISEL-NEXT: orr w8, w8, w9, lsl #11
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[14]
-; GISEL-NEXT: orr w8, w8, w9, lsl #12
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[15]
-; GISEL-NEXT: orr w8, w8, w9, lsl #13
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: orr w8, w8, w9, lsl #14
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: orr w8, w8, w9, lsl #15
-; GISEL-NEXT: strh w8, [sp, #14]
-; GISEL-NEXT: and w0, w8, #0xffff
-; GISEL-NEXT: add sp, sp, #16
-; GISEL-NEXT: ret
+; CHECK-LABEL: convert_to_bitmask16:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x8, lCPI0_0 at PAGE
+; CHECK-NEXT: cmeq.16b v0, v0, #0
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: ldr q1, [x8, lCPI0_0 at PAGEOFF]
+; CHECK-NEXT: bic.16b v0, v1, v0
+; CHECK-NEXT: ext.16b v1, v0, v0, #8
+; CHECK-NEXT: zip1.16b v0, v0, v1
+; CHECK-NEXT: addv.8h h0, v0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1
; Actual conversion
@@ -101,50 +30,19 @@ define i16 @convert_to_bitmask16(<16 x i8> %vec) {
}
define i16 @convert_to_bitmask8(<8 x i16> %vec) {
-; SDAG-LABEL: convert_to_bitmask8:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: adrp x8, lCPI1_0 at PAGE
-; SDAG-NEXT: cmeq.8h v0, v0, #0
-; SDAG-NEXT: ldr q1, [x8, lCPI1_0 at PAGEOFF]
-; SDAG-NEXT: bic.16b v0, v1, v0
-; SDAG-NEXT: addv.8h h0, v0
-; SDAG-NEXT: fmov w8, s0
-; SDAG-NEXT: and w0, w8, #0xff
-; SDAG-NEXT: ret
-;
-; GISEL-LABEL: convert_to_bitmask8:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: sub sp, sp, #16
-; GISEL-NEXT: .cfi_def_cfa_offset 16
-; GISEL-NEXT: cmeq.8h v0, v0, #0
-; GISEL-NEXT: mvn.16b v0, v0
-; GISEL-NEXT: xtn.8b v0, v0
-; GISEL-NEXT: umov.b w8, v0[1]
-; GISEL-NEXT: umov.b w9, v0[0]
-; GISEL-NEXT: umov.b w10, v0[2]
-; GISEL-NEXT: umov.b w11, v0[3]
-; GISEL-NEXT: and w8, w8, #0x1
-; GISEL-NEXT: bfi w9, w8, #1, #31
-; GISEL-NEXT: and w8, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[4]
-; GISEL-NEXT: orr w8, w9, w8, lsl #2
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[5]
-; GISEL-NEXT: orr w8, w8, w9, lsl #3
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[6]
-; GISEL-NEXT: orr w8, w8, w9, lsl #4
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[7]
-; GISEL-NEXT: orr w8, w8, w9, lsl #5
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: orr w8, w8, w9, lsl #6
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: orr w8, w8, w9, lsl #7
-; GISEL-NEXT: strb w8, [sp, #15]
-; GISEL-NEXT: and w0, w8, #0xff
-; GISEL-NEXT: add sp, sp, #16
-; GISEL-NEXT: ret
+; CHECK-LABEL: convert_to_bitmask8:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: adrp x8, lCPI1_0 at PAGE
+; CHECK-NEXT: cmeq.8h v0, v0, #0
+; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: ldr q1, [x8, lCPI1_0 at PAGEOFF]
+; CHECK-NEXT: bic.16b v0, v1, v0
+; CHECK-NEXT: addv.8h h0, v0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: and w0, w8, #0xff
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3
%cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
@@ -156,13 +54,16 @@ define i16 @convert_to_bitmask8(<8 x i16> %vec) {
define i4 @convert_to_bitmask4(<4 x i32> %vec) {
; CHECK-LABEL: convert_to_bitmask4:
; CHECK: ; %bb.0:
+; CHECK-NEXT: Lloh4:
; CHECK-NEXT: adrp x8, lCPI2_0 at PAGE
; CHECK-NEXT: cmeq.4s v0, v0, #0
+; CHECK-NEXT: Lloh5:
; CHECK-NEXT: ldr q1, [x8, lCPI2_0 at PAGEOFF]
; CHECK-NEXT: bic.16b v0, v1, v0
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5
%cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
@@ -173,14 +74,17 @@ define i4 @convert_to_bitmask4(<4 x i32> %vec) {
define i8 @convert_to_bitmask2(<2 x i64> %vec) {
; CHECK-LABEL: convert_to_bitmask2:
; CHECK: ; %bb.0:
+; CHECK-NEXT: Lloh6:
; CHECK-NEXT: adrp x8, lCPI3_0 at PAGE
; CHECK-NEXT: cmeq.2d v0, v0, #0
+; CHECK-NEXT: Lloh7:
; CHECK-NEXT: ldr q1, [x8, lCPI3_0 at PAGEOFF]
; CHECK-NEXT: bic.16b v0, v1, v0
; CHECK-NEXT: addp.2d d0, v0
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: and w0, w8, #0x3
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7
%cmp_result = icmp ne <2 x i64> %vec, zeroinitializer
@@ -193,13 +97,16 @@ define i8 @convert_to_bitmask2(<2 x i64> %vec) {
define i8 @clang_builtins_undef_concat_convert_to_bitmask4(<4 x i32> %vec) {
; CHECK-LABEL: clang_builtins_undef_concat_convert_to_bitmask4:
; CHECK: ; %bb.0:
+; CHECK-NEXT: Lloh8:
; CHECK-NEXT: adrp x8, lCPI4_0 at PAGE
; CHECK-NEXT: cmeq.4s v0, v0, #0
+; CHECK-NEXT: Lloh9:
; CHECK-NEXT: ldr q1, [x8, lCPI4_0 at PAGEOFF]
; CHECK-NEXT: bic.16b v0, v1, v0
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh9
%cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
@@ -213,7 +120,9 @@ define i4 @convert_to_bitmask_no_compare(<4 x i32> %vec1, <4 x i32> %vec2) {
; CHECK-LABEL: convert_to_bitmask_no_compare:
; CHECK: ; %bb.0:
; CHECK-NEXT: and.16b v0, v0, v1
+; CHECK-NEXT: Lloh10:
; CHECK-NEXT: adrp x8, lCPI5_0 at PAGE
+; CHECK-NEXT: Lloh11:
; CHECK-NEXT: ldr q1, [x8, lCPI5_0 at PAGEOFF]
; CHECK-NEXT: shl.4s v0, v0, #31
; CHECK-NEXT: cmlt.4s v0, v0, #0
@@ -221,6 +130,7 @@ define i4 @convert_to_bitmask_no_compare(<4 x i32> %vec1, <4 x i32> %vec2) {
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh11
%cmp = and <4 x i32> %vec1, %vec2
@@ -234,13 +144,16 @@ define i4 @convert_to_bitmask_with_compare_chain(<4 x i32> %vec1, <4 x i32> %vec
; CHECK: ; %bb.0:
; CHECK-NEXT: cmeq.4s v2, v0, #0
; CHECK-NEXT: cmeq.4s v0, v0, v1
+; CHECK-NEXT: Lloh12:
; CHECK-NEXT: adrp x8, lCPI6_0 at PAGE
+; CHECK-NEXT: Lloh13:
; CHECK-NEXT: ldr q1, [x8, lCPI6_0 at PAGEOFF]
; CHECK-NEXT: bic.16b v0, v0, v2
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh13
%cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
@@ -254,8 +167,10 @@ define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %ve
; CHECK-LABEL: convert_to_bitmask_with_trunc_in_chain:
; CHECK: ; %bb.0:
; CHECK-NEXT: cmeq.4s v0, v0, #0
+; CHECK-NEXT: Lloh14:
; CHECK-NEXT: adrp x8, lCPI7_0 at PAGE
; CHECK-NEXT: bic.16b v0, v1, v0
+; CHECK-NEXT: Lloh15:
; CHECK-NEXT: ldr q1, [x8, lCPI7_0 at PAGEOFF]
; CHECK-NEXT: shl.4s v0, v0, #31
; CHECK-NEXT: cmlt.4s v0, v0, #0
@@ -263,6 +178,7 @@ define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %ve
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh15
%cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
@@ -277,6 +193,7 @@ define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <
; CHECK: ; %bb.0:
; CHECK-NEXT: cmeq.4s v0, v0, #0
; CHECK-NEXT: cmeq.4s v1, v1, #0
+; CHECK-NEXT: Lloh16:
; CHECK-NEXT: adrp x8, lCPI8_0 at PAGE
; CHECK-NEXT: movi d2, #0x000000ffffffff
; CHECK-NEXT: movi d3, #0x00ffffffffffff
@@ -290,6 +207,7 @@ define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <
; CHECK-NEXT: mov.h v1[2], wzr
; CHECK-NEXT: orr.8b v0, v0, v3
; CHECK-NEXT: orr.8b v0, v1, v0
+; CHECK-NEXT: Lloh17:
; CHECK-NEXT: ldr d1, [x8, lCPI8_0 at PAGEOFF]
; CHECK-NEXT: shl.4h v0, v0, #15
; CHECK-NEXT: cmlt.4h v0, v0, #0
@@ -297,6 +215,7 @@ define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <
; CHECK-NEXT: addv.4h h0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh17
%cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer
@@ -319,14 +238,17 @@ define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4
; CHECK: ; %bb.0:
; CHECK-NEXT: cmeq.4s v1, v1, #0
; CHECK-NEXT: cmeq.4h v0, v0, #0
+; CHECK-NEXT: Lloh18:
; CHECK-NEXT: adrp x8, lCPI9_0 at PAGE
; CHECK-NEXT: xtn.4h v1, v1
; CHECK-NEXT: orn.8b v0, v1, v0
+; CHECK-NEXT: Lloh19:
; CHECK-NEXT: ldr d1, [x8, lCPI9_0 at PAGEOFF]
; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: addv.4h h0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh19
%cmp1 = icmp ne <4 x i16> %vec1, zeroinitializer
@@ -337,73 +259,21 @@ define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4
}
define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) {
-; SDAG-LABEL: convert_to_bitmask_without_knowing_type:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: shl.16b v0, v0, #7
-; SDAG-NEXT: adrp x8, lCPI10_0 at PAGE
-; SDAG-NEXT: ldr q1, [x8, lCPI10_0 at PAGEOFF]
-; SDAG-NEXT: cmlt.16b v0, v0, #0
-; SDAG-NEXT: and.16b v0, v0, v1
-; SDAG-NEXT: ext.16b v1, v0, v0, #8
-; SDAG-NEXT: zip1.16b v0, v0, v1
-; SDAG-NEXT: addv.8h h0, v0
-; SDAG-NEXT: fmov w0, s0
-; SDAG-NEXT: ret
-;
-; GISEL-LABEL: convert_to_bitmask_without_knowing_type:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: sub sp, sp, #16
-; GISEL-NEXT: .cfi_def_cfa_offset 16
-; GISEL-NEXT: umov.b w8, v0[1]
-; GISEL-NEXT: umov.b w9, v0[0]
-; GISEL-NEXT: umov.b w10, v0[2]
-; GISEL-NEXT: umov.b w11, v0[3]
-; GISEL-NEXT: and w8, w8, #0x1
-; GISEL-NEXT: bfi w9, w8, #1, #31
-; GISEL-NEXT: and w8, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[4]
-; GISEL-NEXT: orr w8, w9, w8, lsl #2
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[5]
-; GISEL-NEXT: orr w8, w8, w9, lsl #3
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[6]
-; GISEL-NEXT: orr w8, w8, w9, lsl #4
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[7]
-; GISEL-NEXT: orr w8, w8, w9, lsl #5
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[8]
-; GISEL-NEXT: orr w8, w8, w9, lsl #6
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[9]
-; GISEL-NEXT: orr w8, w8, w9, lsl #7
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[10]
-; GISEL-NEXT: orr w8, w8, w9, lsl #8
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[11]
-; GISEL-NEXT: orr w8, w8, w9, lsl #9
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[12]
-; GISEL-NEXT: orr w8, w8, w9, lsl #10
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[13]
-; GISEL-NEXT: orr w8, w8, w9, lsl #11
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[14]
-; GISEL-NEXT: orr w8, w8, w9, lsl #12
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[15]
-; GISEL-NEXT: orr w8, w8, w9, lsl #13
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: orr w8, w8, w9, lsl #14
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: orr w8, w8, w9, lsl #15
-; GISEL-NEXT: strh w8, [sp, #14]
-; GISEL-NEXT: and w0, w8, #0xffff
-; GISEL-NEXT: add sp, sp, #16
-; GISEL-NEXT: ret
+; CHECK-LABEL: convert_to_bitmask_without_knowing_type:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: shl.16b v0, v0, #7
+; CHECK-NEXT: Lloh20:
+; CHECK-NEXT: adrp x8, lCPI10_0 at PAGE
+; CHECK-NEXT: Lloh21:
+; CHECK-NEXT: ldr q1, [x8, lCPI10_0 at PAGEOFF]
+; CHECK-NEXT: cmlt.16b v0, v0, #0
+; CHECK-NEXT: and.16b v0, v0, v1
+; CHECK-NEXT: ext.16b v1, v0, v0, #8
+; CHECK-NEXT: zip1.16b v0, v0, v1
+; CHECK-NEXT: addv.8h h0, v0
+; CHECK-NEXT: fmov w0, s0
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh20, Lloh21
%bitmask = bitcast <16 x i1> %vec to i16
ret i16 %bitmask
@@ -412,13 +282,16 @@ define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) {
define i2 @convert_to_bitmask_2xi32(<2 x i32> %vec) {
; CHECK-LABEL: convert_to_bitmask_2xi32:
; CHECK: ; %bb.0:
+; CHECK-NEXT: Lloh22:
; CHECK-NEXT: adrp x8, lCPI11_0 at PAGE
; CHECK-NEXT: cmeq.2s v0, v0, #0
+; CHECK-NEXT: Lloh23:
; CHECK-NEXT: ldr d1, [x8, lCPI11_0 at PAGEOFF]
; CHECK-NEXT: bic.8b v0, v1, v0
; CHECK-NEXT: addp.2s v0, v0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh23
%cmp_result = icmp ne <2 x i32> %vec, zeroinitializer
%bitmask = bitcast <2 x i1> %cmp_result to i2
@@ -429,13 +302,16 @@ define i4 @convert_to_bitmask_4xi8(<4 x i8> %vec) {
; CHECK-LABEL: convert_to_bitmask_4xi8:
; CHECK: ; %bb.0:
; CHECK-NEXT: bic.4h v0, #255, lsl #8
+; CHECK-NEXT: Lloh24:
; CHECK-NEXT: adrp x8, lCPI12_0 at PAGE
+; CHECK-NEXT: Lloh25:
; CHECK-NEXT: ldr d1, [x8, lCPI12_0 at PAGEOFF]
; CHECK-NEXT: cmeq.4h v0, v0, #0
; CHECK-NEXT: bic.8b v0, v1, v0
; CHECK-NEXT: addv.4h h0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh25
%cmp_result = icmp ne <4 x i8> %vec, zeroinitializer
%bitmask = bitcast <4 x i1> %cmp_result to i4
@@ -446,14 +322,17 @@ define i8 @convert_to_bitmask_8xi2(<8 x i2> %vec) {
; CHECK-LABEL: convert_to_bitmask_8xi2:
; CHECK: ; %bb.0:
; CHECK-NEXT: movi.8b v1, #3
+; CHECK-NEXT: Lloh26:
; CHECK-NEXT: adrp x8, lCPI13_0 at PAGE
; CHECK-NEXT: and.8b v0, v0, v1
+; CHECK-NEXT: Lloh27:
; CHECK-NEXT: ldr d1, [x8, lCPI13_0 at PAGEOFF]
; CHECK-NEXT: cmeq.8b v0, v0, #0
; CHECK-NEXT: bic.8b v0, v1, v0
; CHECK-NEXT: addv.8b b0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh27
%cmp_result = icmp ne <8 x i2> %vec, zeroinitializer
%bitmask = bitcast <8 x i1> %cmp_result to i8
@@ -465,13 +344,16 @@ define i4 @convert_to_bitmask_float(<4 x float> %vec) {
; CHECK: ; %bb.0:
; CHECK-NEXT: fcmgt.4s v1, v0, #0.0
; CHECK-NEXT: fcmlt.4s v0, v0, #0.0
+; CHECK-NEXT: Lloh28:
; CHECK-NEXT: adrp x8, lCPI14_0 at PAGE
; CHECK-NEXT: orr.16b v0, v0, v1
+; CHECK-NEXT: Lloh29:
; CHECK-NEXT: ldr q1, [x8, lCPI14_0 at PAGEOFF]
; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh28, Lloh29
%cmp_result = fcmp one <4 x float> %vec, zeroinitializer
@@ -482,58 +364,24 @@ define i4 @convert_to_bitmask_float(<4 x float> %vec) {
; Larger vector types don't map directly, but the can be split/truncated and then converted.
; After the comparison against 0, this is truncated to <8 x i16>, which is valid again.
define i8 @convert_large_vector(<8 x i32> %vec) {
-; SDAG-LABEL: convert_large_vector:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: sub sp, sp, #16
-; SDAG-NEXT: .cfi_def_cfa_offset 16
-; SDAG-NEXT: cmeq.4s v1, v1, #0
-; SDAG-NEXT: cmeq.4s v0, v0, #0
-; SDAG-NEXT: adrp x8, lCPI15_0 at PAGE
-; SDAG-NEXT: uzp1.8h v0, v0, v1
-; SDAG-NEXT: ldr q1, [x8, lCPI15_0 at PAGEOFF]
-; SDAG-NEXT: bic.16b v0, v1, v0
-; SDAG-NEXT: addv.8h h0, v0
-; SDAG-NEXT: fmov w8, s0
-; SDAG-NEXT: and w0, w8, #0xff
-; SDAG-NEXT: add sp, sp, #16
-; SDAG-NEXT: ret
-;
-; GISEL-LABEL: convert_large_vector:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: sub sp, sp, #16
-; GISEL-NEXT: .cfi_def_cfa_offset 16
-; GISEL-NEXT: cmeq.4s v0, v0, #0
-; GISEL-NEXT: cmeq.4s v1, v1, #0
-; GISEL-NEXT: mvn.16b v0, v0
-; GISEL-NEXT: mvn.16b v1, v1
-; GISEL-NEXT: uzp1.8h v0, v0, v1
-; GISEL-NEXT: xtn.8b v0, v0
-; GISEL-NEXT: umov.b w8, v0[1]
-; GISEL-NEXT: umov.b w9, v0[0]
-; GISEL-NEXT: umov.b w10, v0[2]
-; GISEL-NEXT: umov.b w11, v0[3]
-; GISEL-NEXT: and w8, w8, #0x1
-; GISEL-NEXT: bfi w9, w8, #1, #31
-; GISEL-NEXT: and w8, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[4]
-; GISEL-NEXT: orr w8, w9, w8, lsl #2
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[5]
-; GISEL-NEXT: orr w8, w8, w9, lsl #3
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: umov.b w10, v0[6]
-; GISEL-NEXT: orr w8, w8, w9, lsl #4
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: umov.b w11, v0[7]
-; GISEL-NEXT: orr w8, w8, w9, lsl #5
-; GISEL-NEXT: and w9, w10, #0x1
-; GISEL-NEXT: orr w8, w8, w9, lsl #6
-; GISEL-NEXT: and w9, w11, #0x1
-; GISEL-NEXT: orr w8, w8, w9, lsl #7
-; GISEL-NEXT: strb w8, [sp, #15]
-; GISEL-NEXT: and w0, w8, #0xff
-; GISEL-NEXT: add sp, sp, #16
-; GISEL-NEXT: ret
+; CHECK-LABEL: convert_large_vector:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: cmeq.4s v1, v1, #0
+; CHECK-NEXT: cmeq.4s v0, v0, #0
+; CHECK-NEXT: Lloh30:
+; CHECK-NEXT: adrp x8, lCPI15_0 at PAGE
+; CHECK-NEXT: uzp1.8h v0, v0, v1
+; CHECK-NEXT: Lloh31:
+; CHECK-NEXT: ldr q1, [x8, lCPI15_0 at PAGEOFF]
+; CHECK-NEXT: bic.16b v0, v1, v0
+; CHECK-NEXT: addv.8h h0, v0
+; CHECK-NEXT: fmov w8, s0
+; CHECK-NEXT: and w0, w8, #0xff
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh31
%cmp_result = icmp ne <8 x i32> %vec, zeroinitializer
@@ -545,14 +393,17 @@ define i4 @convert_legalized_illegal_element_size(<4 x i22> %vec) {
; CHECK-LABEL: convert_legalized_illegal_element_size:
; CHECK: ; %bb.0:
; CHECK-NEXT: movi.4s v1, #63, msl #16
+; CHECK-NEXT: Lloh32:
; CHECK-NEXT: adrp x8, lCPI16_0 at PAGE
; CHECK-NEXT: cmtst.4s v0, v0, v1
+; CHECK-NEXT: Lloh33:
; CHECK-NEXT: ldr d1, [x8, lCPI16_0 at PAGEOFF]
; CHECK-NEXT: xtn.4h v0, v0
; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: addv.4h h0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh33
%cmp_result = icmp ne <4 x i22> %vec, zeroinitializer
%bitmask = bitcast <4 x i1> %cmp_result to i4
@@ -564,6 +415,7 @@ define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) {
; CHECK-LABEL: no_direct_convert_for_bad_concat:
; CHECK: ; %bb.0:
; CHECK-NEXT: cmtst.4s v0, v0, v0
+; CHECK-NEXT: Lloh34:
; CHECK-NEXT: adrp x8, lCPI17_0 at PAGE
; CHECK-NEXT: xtn.4h v0, v0
; CHECK-NEXT: umov.h w9, v0[0]
@@ -575,12 +427,14 @@ define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) {
; CHECK-NEXT: umov.h w9, v0[3]
; CHECK-NEXT: mov.b v1[7], w9
; CHECK-NEXT: shl.8b v0, v1, #7
+; CHECK-NEXT: Lloh35:
; CHECK-NEXT: ldr d1, [x8, lCPI17_0 at PAGEOFF]
; CHECK-NEXT: cmlt.8b v0, v0, #0
; CHECK-NEXT: and.8b v0, v0, v1
; CHECK-NEXT: addv.8b b0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh35
%cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
%vector_pad = shufflevector <4 x i1> poison, <4 x i1> %cmp_result, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7>
@@ -589,18 +443,11 @@ define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) {
}
define <8 x i1> @no_convert_without_direct_bitcast(<8 x i16> %vec) {
-; SDAG-LABEL: no_convert_without_direct_bitcast:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: cmtst.8h v0, v0, v0
-; SDAG-NEXT: xtn.8b v0, v0
-; SDAG-NEXT: ret
-;
-; GISEL-LABEL: no_convert_without_direct_bitcast:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: cmeq.8h v0, v0, #0
-; GISEL-NEXT: mvn.16b v0, v0
-; GISEL-NEXT: xtn.8b v0, v0
-; GISEL-NEXT: ret
+; CHECK-LABEL: no_convert_without_direct_bitcast:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: cmtst.8h v0, v0, v0
+; CHECK-NEXT: xtn.8b v0, v0
+; CHECK-NEXT: ret
%cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
ret <8 x i1> %cmp_result
@@ -645,220 +492,28 @@ define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) {
; Only apply the combine when casting a vector to a scalar.
define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind {
-; SDAG-LABEL: vector_to_vector_cast:
-; SDAG: ; %bb.0:
-; SDAG-NEXT: sub sp, sp, #16
-; SDAG-NEXT: shl.16b v0, v0, #7
-; SDAG-NEXT: adrp x8, lCPI20_0 at PAGE
-; SDAG-NEXT: ldr q1, [x8, lCPI20_0 at PAGEOFF]
-; SDAG-NEXT: add x8, sp, #14
-; SDAG-NEXT: cmlt.16b v0, v0, #0
-; SDAG-NEXT: and.16b v0, v0, v1
-; SDAG-NEXT: ext.16b v1, v0, v0, #8
-; SDAG-NEXT: zip1.16b v0, v0, v1
-; SDAG-NEXT: addv.8h h0, v0
-; SDAG-NEXT: str h0, [sp, #14]
-; SDAG-NEXT: ld1.b { v0 }[0], [x8]
-; SDAG-NEXT: orr x8, x8, #0x1
-; SDAG-NEXT: ld1.b { v0 }[4], [x8]
-; SDAG-NEXT: ; kill: def $d0 killed $d0 killed $q0
-; SDAG-NEXT: add sp, sp, #16
-; SDAG-NEXT: ret
-;
-; GISEL-LABEL: vector_to_vector_cast:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: sub sp, sp, #16
-; GISEL-NEXT: umov.b w8, v0[1]
-; GISEL-NEXT: mov d1, v0[1]
-; GISEL-NEXT: umov.b w10, v0[1]
-; GISEL-NEXT: umov.b w9, v0[0]
-; GISEL-NEXT: umov.b w13, v0[0]
-; GISEL-NEXT: umov.b w14, v0[2]
-; GISEL-NEXT: umov.b w15, v0[3]
-; GISEL-NEXT: umov.b w11, v0[2]
-; GISEL-NEXT: umov.b w16, v0[4]
-; GISEL-NEXT: umov.b w17, v0[5]
-; GISEL-NEXT: umov.b w12, v0[3]
-; GISEL-NEXT: and w8, w8, #0x1
-; GISEL-NEXT: and w10, w10, #0x1
-; GISEL-NEXT: umov.b w0, v1[1]
-; GISEL-NEXT: bfi w9, w8, #1, #31
-; GISEL-NEXT: bfi w13, w10, #1, #31
-; GISEL-NEXT: and w14, w14, #0x1
-; GISEL-NEXT: umov.b w8, v1[0]
-; GISEL-NEXT: umov.b w10, v1[2]
-; GISEL-NEXT: and w15, w15, #0x1
-; GISEL-NEXT: orr w13, w13, w14, lsl #2
-; GISEL-NEXT: umov.b w14, v1[3]
-; GISEL-NEXT: and w11, w11, #0x1
-; GISEL-NEXT: and w0, w0, #0x1
-; GISEL-NEXT: and w16, w16, #0x1
-; GISEL-NEXT: orr w9, w9, w11, lsl #2
-; GISEL-NEXT: orr w13, w13, w15, lsl #3
-; GISEL-NEXT: umov.b w15, v1[4]
-; GISEL-NEXT: umov.b w11, v0[6]
-; GISEL-NEXT: bfi w8, w0, #1, #31
-; GISEL-NEXT: and w10, w10, #0x1
-; GISEL-NEXT: and w17, w17, #0x1
-; GISEL-NEXT: orr w13, w13, w16, lsl #4
-; GISEL-NEXT: and w14, w14, #0x1
-; GISEL-NEXT: umov.b w0, v0[7]
-; GISEL-NEXT: orr w8, w8, w10, lsl #2
-; GISEL-NEXT: umov.b w10, v1[5]
-; GISEL-NEXT: umov.b w16, v1[6]
-; GISEL-NEXT: orr w13, w13, w17, lsl #5
-; GISEL-NEXT: umov.b w17, v0[4]
-; GISEL-NEXT: and w15, w15, #0x1
-; GISEL-NEXT: orr w8, w8, w14, lsl #3
-; GISEL-NEXT: and w12, w12, #0x1
-; GISEL-NEXT: and w11, w11, #0x1
-; GISEL-NEXT: umov.b w14, v1[7]
-; GISEL-NEXT: orr w9, w9, w12, lsl #3
-; GISEL-NEXT: orr w11, w13, w11, lsl #6
-; GISEL-NEXT: orr w8, w8, w15, lsl #4
-; GISEL-NEXT: umov.b w15, v0[5]
-; GISEL-NEXT: and w10, w10, #0x1
-; GISEL-NEXT: and w0, w0, #0x1
-; GISEL-NEXT: and w12, w17, #0x1
-; GISEL-NEXT: umov.b w13, v0[1]
-; GISEL-NEXT: orr w8, w8, w10, lsl #5
-; GISEL-NEXT: and w16, w16, #0x1
-; GISEL-NEXT: orr w9, w9, w12, lsl #4
-; GISEL-NEXT: umov.b w10, v0[0]
-; GISEL-NEXT: orr w11, w11, w0, lsl #7
-; GISEL-NEXT: and w14, w14, #0x1
-; GISEL-NEXT: and w12, w15, #0x1
-; GISEL-NEXT: umov.b w15, v0[2]
-; GISEL-NEXT: orr w8, w8, w16, lsl #6
-; GISEL-NEXT: orr w9, w9, w12, lsl #5
-; GISEL-NEXT: umov.b w12, v0[6]
-; GISEL-NEXT: strb w11, [sp, #8]
-; GISEL-NEXT: and w11, w13, #0x1
-; GISEL-NEXT: umov.b w13, v0[3]
-; GISEL-NEXT: orr w8, w8, w14, lsl #7
-; GISEL-NEXT: umov.b w14, v0[7]
-; GISEL-NEXT: ldr b0, [sp, #8]
-; GISEL-NEXT: bfi w10, w11, #1, #31
-; GISEL-NEXT: and w11, w15, #0x1
-; GISEL-NEXT: strb w8, [sp, #9]
-; GISEL-NEXT: umov.b w15, v0[4]
-; GISEL-NEXT: and w8, w12, #0x1
-; GISEL-NEXT: orr w10, w10, w11, lsl #2
-; GISEL-NEXT: orr w8, w9, w8, lsl #6
-; GISEL-NEXT: and w9, w13, #0x1
-; GISEL-NEXT: umov.b w11, v0[1]
-; GISEL-NEXT: orr w9, w10, w9, lsl #3
-; GISEL-NEXT: umov.b w10, v0[5]
-; GISEL-NEXT: umov.b w12, v0[0]
-; GISEL-NEXT: and w13, w14, #0x1
-; GISEL-NEXT: umov.b w16, v0[2]
-; GISEL-NEXT: umov.b w17, v0[3]
-; GISEL-NEXT: and w14, w15, #0x1
-; GISEL-NEXT: umov.b w15, v0[2]
-; GISEL-NEXT: orr w8, w8, w13, lsl #7
-; GISEL-NEXT: orr w9, w9, w14, lsl #4
-; GISEL-NEXT: umov.b w13, v0[6]
-; GISEL-NEXT: and w11, w11, #0x1
-; GISEL-NEXT: umov.b w14, v0[3]
-; GISEL-NEXT: strb w8, [sp, #10]
-; GISEL-NEXT: and w8, w10, #0x1
-; GISEL-NEXT: bfi w12, w11, #1, #31
-; GISEL-NEXT: orr w8, w9, w8, lsl #5
-; GISEL-NEXT: umov.b w10, v0[4]
-; GISEL-NEXT: and w9, w15, #0x1
-; GISEL-NEXT: umov.b w11, v0[7]
-; GISEL-NEXT: umov.b w15, v0[1]
-; GISEL-NEXT: orr w9, w12, w9, lsl #2
-; GISEL-NEXT: umov.b w12, v0[5]
-; GISEL-NEXT: and w13, w13, #0x1
-; GISEL-NEXT: and w14, w14, #0x1
-; GISEL-NEXT: orr w8, w8, w13, lsl #6
-; GISEL-NEXT: umov.b w13, v0[0]
-; GISEL-NEXT: orr w9, w9, w14, lsl #3
-; GISEL-NEXT: and w10, w10, #0x1
-; GISEL-NEXT: umov.b w14, v0[6]
-; GISEL-NEXT: and w11, w11, #0x1
-; GISEL-NEXT: and w15, w15, #0x1
-; GISEL-NEXT: umov.b w0, v0[3]
-; GISEL-NEXT: orr w9, w9, w10, lsl #4
-; GISEL-NEXT: and w10, w12, #0x1
-; GISEL-NEXT: umov.b w12, v0[7]
-; GISEL-NEXT: orr w8, w8, w11, lsl #7
-; GISEL-NEXT: bfi w13, w15, #1, #31
-; GISEL-NEXT: and w11, w16, #0x1
-; GISEL-NEXT: orr w9, w9, w10, lsl #5
-; GISEL-NEXT: and w10, w14, #0x1
-; GISEL-NEXT: umov.b w14, v0[4]
-; GISEL-NEXT: strb w8, [sp, #11]
-; GISEL-NEXT: umov.b w15, v0[1]
-; GISEL-NEXT: umov.b w16, v0[3]
-; GISEL-NEXT: orr w8, w9, w10, lsl #6
-; GISEL-NEXT: orr w9, w13, w11, lsl #2
-; GISEL-NEXT: and w10, w12, #0x1
-; GISEL-NEXT: and w11, w17, #0x1
-; GISEL-NEXT: umov.b w12, v0[5]
-; GISEL-NEXT: umov.b w17, v0[0]
-; GISEL-NEXT: orr w8, w8, w10, lsl #7
-; GISEL-NEXT: orr w9, w9, w11, lsl #3
-; GISEL-NEXT: umov.b w10, v0[1]
-; GISEL-NEXT: and w11, w14, #0x1
-; GISEL-NEXT: umov.b w14, v0[0]
-; GISEL-NEXT: and w15, w15, #0x1
-; GISEL-NEXT: orr w9, w9, w11, lsl #4
-; GISEL-NEXT: umov.b w11, v0[2]
-; GISEL-NEXT: umov.b w13, v0[6]
-; GISEL-NEXT: and w12, w12, #0x1
-; GISEL-NEXT: bfi w17, w15, #1, #31
-; GISEL-NEXT: umov.b w15, v0[5]
-; GISEL-NEXT: orr w9, w9, w12, lsl #5
-; GISEL-NEXT: and w10, w10, #0x1
-; GISEL-NEXT: umov.b w12, v0[2]
-; GISEL-NEXT: bfi w14, w10, #1, #31
-; GISEL-NEXT: umov.b w10, v0[4]
-; GISEL-NEXT: ldr b1, [sp, #9]
-; GISEL-NEXT: and w11, w11, #0x1
-; GISEL-NEXT: and w13, w13, #0x1
-; GISEL-NEXT: strb w8, [sp, #12]
-; GISEL-NEXT: orr w11, w14, w11, lsl #2
-; GISEL-NEXT: and w14, w16, #0x1
-; GISEL-NEXT: umov.b w16, v0[4]
-; GISEL-NEXT: and w12, w12, #0x1
-; GISEL-NEXT: and w15, w15, #0x1
-; GISEL-NEXT: orr w9, w9, w13, lsl #6
-; GISEL-NEXT: orr w11, w11, w14, lsl #3
-; GISEL-NEXT: orr w12, w17, w12, lsl #2
-; GISEL-NEXT: and w10, w10, #0x1
-; GISEL-NEXT: and w17, w0, #0x1
-; GISEL-NEXT: umov.b w0, v0[5]
-; GISEL-NEXT: umov.b w14, v0[6]
-; GISEL-NEXT: orr w10, w11, w10, lsl #4
-; GISEL-NEXT: orr w12, w12, w17, lsl #3
-; GISEL-NEXT: umov.b w11, v0[7]
-; GISEL-NEXT: and w16, w16, #0x1
-; GISEL-NEXT: umov.b w17, v0[6]
-; GISEL-NEXT: orr w10, w10, w15, lsl #5
-; GISEL-NEXT: umov.b w15, v0[7]
-; GISEL-NEXT: orr w12, w12, w16, lsl #4
-; GISEL-NEXT: and w16, w0, #0x1
-; GISEL-NEXT: umov.b w0, v0[7]
-; GISEL-NEXT: and w14, w14, #0x1
-; GISEL-NEXT: orr w12, w12, w16, lsl #5
-; GISEL-NEXT: orr w10, w10, w14, lsl #6
-; GISEL-NEXT: and w11, w11, #0x1
-; GISEL-NEXT: and w13, w17, #0x1
-; GISEL-NEXT: orr w9, w9, w11, lsl #7
-; GISEL-NEXT: mov.s v0[1], v1[0]
-; GISEL-NEXT: orr w11, w12, w13, lsl #6
-; GISEL-NEXT: and w12, w15, #0x1
-; GISEL-NEXT: ; kill: def $d0 killed $d0 killed $q0
-; GISEL-NEXT: orr w8, w10, w12, lsl #7
-; GISEL-NEXT: and w10, w0, #0x1
-; GISEL-NEXT: strb w9, [sp, #13]
-; GISEL-NEXT: orr w9, w11, w10, lsl #7
-; GISEL-NEXT: strb w8, [sp, #14]
-; GISEL-NEXT: strb w9, [sp, #15]
-; GISEL-NEXT: add sp, sp, #16
-; GISEL-NEXT: ret
+; CHECK-LABEL: vector_to_vector_cast:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: shl.16b v0, v0, #7
+; CHECK-NEXT: Lloh36:
+; CHECK-NEXT: adrp x8, lCPI20_0 at PAGE
+; CHECK-NEXT: Lloh37:
+; CHECK-NEXT: ldr q1, [x8, lCPI20_0 at PAGEOFF]
+; CHECK-NEXT: add x8, sp, #14
+; CHECK-NEXT: cmlt.16b v0, v0, #0
+; CHECK-NEXT: and.16b v0, v0, v1
+; CHECK-NEXT: ext.16b v1, v0, v0, #8
+; CHECK-NEXT: zip1.16b v0, v0, v1
+; CHECK-NEXT: addv.8h h0, v0
+; CHECK-NEXT: str h0, [sp, #14]
+; CHECK-NEXT: ld1.b { v0 }[0], [x8]
+; CHECK-NEXT: orr x8, x8, #0x1
+; CHECK-NEXT: ld1.b { v0 }[4], [x8]
+; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh37
%bc = bitcast <16 x i1> %arg to <2 x i8>
ret <2 x i8> %bc
}
More information about the llvm-commits
mailing list