[llvm] [AArch64][GlobalISel] Legalization for small anyext/sext/zext (PR #86438)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 24 07:32:42 PDT 2024
https://github.com/davemgreen created https://github.com/llvm/llvm-project/pull/86438
Similar to #85625, some of the codegen is still far from optimal but this helps fix quite a few fallback cases.
>From 9eccbf14ff4c082ffa7a34a6e4c85df5e37de540 Mon Sep 17 00:00:00 2001
From: David Green <david.green at arm.com>
Date: Sun, 24 Mar 2024 14:30:36 +0000
Subject: [PATCH] [AArch64][GlobalISel] Legalization for small anyext/sext/zext
Similar to #85625, some of the codegen is still far from optimal but this
helps fix quite a few fallback cases.
---
.../CodeGen/GlobalISel/LegalizerHelper.cpp | 22 +-
.../AArch64/GISel/AArch64LegalizerInfo.cpp | 5 +-
.../GlobalISel/legalize-build-vector.mir | 27 +-
.../GlobalISel/legalize-insert-vector-elt.mir | 29 +-
.../AArch64/GlobalISel/legalize-select.mir | 57 ++--
.../legalize-shuffle-vector-widen-crash.ll | 14 +-
.../AArch64/GlobalISel/legalize-xtn.mir | 18 +-
llvm/test/CodeGen/AArch64/aarch64-smull.ll | 53 +++-
.../AArch64/arm64-extract-insert-varidx.ll | 34 +--
llvm/test/CodeGen/AArch64/bitcast.ll | 145 ++++++---
llvm/test/CodeGen/AArch64/bswap.ll | 26 +-
llvm/test/CodeGen/AArch64/fptoi.ll | 45 ++-
llvm/test/CodeGen/AArch64/itofp.ll | 278 +++++++++++++-----
llvm/test/CodeGen/AArch64/load.ll | 3 +-
.../AArch64/neon-bitwise-instructions.ll | 46 ++-
llvm/test/CodeGen/AArch64/shift.ll | 225 ++++++++++----
16 files changed, 714 insertions(+), 313 deletions(-)
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 1b25da8833e4fb..c3a23ea0ad3738 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -5421,14 +5421,22 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
case TargetOpcode::G_FPTOUI:
case TargetOpcode::G_SITOFP:
case TargetOpcode::G_UITOFP: {
- if (TypeIdx != 0)
- return UnableToLegalize;
Observer.changingInstr(MI);
- LLT SrcTy = LLT::fixed_vector(
- MoreTy.getNumElements(),
- MRI.getType(MI.getOperand(1).getReg()).getElementType());
- moreElementsVectorSrc(MI, SrcTy, 1);
- moreElementsVectorDst(MI, MoreTy, 0);
+ LLT SrcExtTy;
+ LLT DstExtTy;
+ if (TypeIdx == 0) {
+ DstExtTy = MoreTy;
+ SrcExtTy = LLT::fixed_vector(
+ MoreTy.getNumElements(),
+ MRI.getType(MI.getOperand(1).getReg()).getElementType());
+ } else {
+ DstExtTy = LLT::fixed_vector(
+ MoreTy.getNumElements(),
+ MRI.getType(MI.getOperand(0).getReg()).getElementType());
+ SrcExtTy = MoreTy;
+ }
+ moreElementsVectorSrc(MI, SrcExtTy, 1);
+ moreElementsVectorDst(MI, DstExtTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
index 34e2c1d9c8e2f9..33dba6a5c61eaf 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp
@@ -611,7 +611,9 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
Query.Types[0].isVector() &&
(Query.Types[1].getScalarSizeInBits() == 8 ||
Query.Types[1].getScalarSizeInBits() == 16);
- });
+ })
+ .clampMinNumElements(1, s8, 8)
+ .clampMinNumElements(1, s16, 4);
getActionDefinitionsBuilder(G_TRUNC)
.legalFor({{v2s32, v2s64}, {v4s16, v4s32}, {v8s8, v8s16}})
@@ -630,7 +632,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST)
})
.clampMinNumElements(0, s8, 8)
.clampMinNumElements(0, s16, 4)
- .clampMinNumElements(0, s32, 2)
.alwaysLegal();
getActionDefinitionsBuilder(G_SEXT_INREG)
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
index c9556e27c6349a..a63d8b9c137725 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-build-vector.mir
@@ -121,10 +121,11 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $h0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $h1
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY]](s16)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[COPY1]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32)
- ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[DEF]](s16), [[DEF]](s16)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<2 x s32>), [[UV1:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[ANYEXT]](<4 x s32>)
+ ; CHECK-NEXT: $d0 = COPY [[UV]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR
%0:_(s16) = COPY $h0
%1:_(s16) = COPY $h1
@@ -141,8 +142,8 @@ body: |
; CHECK-LABEL: name: widen_v2s8
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY [[DEF]](s32)
- ; CHECK-NEXT: %3:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
- ; CHECK-NEXT: $d0 = COPY %3(<2 x s32>)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[DEF]](s32)
+ ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<2 x s32>)
; CHECK-NEXT: RET_ReallyLR
%0:_(s8) = G_IMPLICIT_DEF
%1:_(s8) = G_IMPLICIT_DEF
@@ -157,12 +158,14 @@ name: widen_v4s8
body: |
bb.0:
; CHECK-LABEL: name: widen_v4s8
- ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[DEF]](s16)
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[DEF]](s16)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[DEF]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[COPY2]](s16), [[DEF]](s16)
- ; CHECK-NEXT: $d0 = COPY [[BUILD_VECTOR]](<4 x s16>)
+ ; CHECK: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[DEF3:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF1]](s8), [[DEF2]](s8), [[DEF3]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
+ ; CHECK-NEXT: $d0 = COPY [[UV]](<4 x s16>)
; CHECK-NEXT: RET_ReallyLR
%0:_(s8) = G_IMPLICIT_DEF
%1:_(s8) = G_IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
index e12353c7ef5bec..d3db2432e84cbc 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-insert-vector-elt.mir
@@ -235,31 +235,32 @@ body: |
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY2]](s32)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[COPY3]](s32)
; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[TRUNC]](s16), [[TRUNC1]](s16), [[TRUNC2]](s16), [[DEF2]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
- ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[C2]](s16), [[C1]](s64)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
- ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
+ ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[UV]], [[C2]](s16), [[C1]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
+ ; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16)
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[TRUNC3]](s8), [[TRUNC4]](s8), [[TRUNC5]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s8>) = G_BUILD_VECTOR [[C]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<16 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<16 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 16, 16, 16, 1, 16, 16, 16, 2, 16, 16, 16, undef, undef, undef, undef)
; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<4 x s32>) = G_BITCAST [[SHUF]](<16 x s8>)
; CHECK-NEXT: [[UITOFP:%[0-9]+]]:_(<4 x s32>) = G_UITOFP [[BITCAST]](<4 x s32>)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>)
- ; CHECK-NEXT: G_STORE [[UV4]](s32), [[COPY]](p0) :: (store (s32), align 16)
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UITOFP]](<4 x s32>)
+ ; CHECK-NEXT: G_STORE [[UV6]](s32), [[COPY]](p0) :: (store (s32), align 16)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; CHECK-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C3]](s64)
- ; CHECK-NEXT: G_STORE [[UV5]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
+ ; CHECK-NEXT: G_STORE [[UV7]](s32), [[PTR_ADD]](p0) :: (store (s32) into unknown-address + 4)
; CHECK-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 8
; CHECK-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C4]](s64)
- ; CHECK-NEXT: G_STORE [[UV6]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8)
+ ; CHECK-NEXT: G_STORE [[UV8]](s32), [[PTR_ADD1]](p0) :: (store (s32) into unknown-address + 8, align 8)
; CHECK-NEXT: G_BR %bb.1
bb.1:
liveins: $w1, $w2, $w3, $x0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
index 63a26dcfea4762..e49a94c12ed468 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-select.mir
@@ -293,41 +293,44 @@ body: |
; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s32) = G_ICMP intpred(eq), %w0(s32), [[C]]
; CHECK-NEXT: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[ICMP2]], 1
; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY [[DEF1]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY]](s16), [[COPY1]](s16), [[COPY2]](s16), [[DEF1]](s16)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+ ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
+ ; CHECK-NEXT: [[UV:%[0-9]+]]:_(<4 x s16>), [[UV1:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[SEXT_INREG]](s32)
- ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[BUILD_VECTOR]], [[TRUNC]](s16), [[C1]](s64)
- ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s16)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s16)
- ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
- ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
- ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
- ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8), [[DEF2]](s8)
+ ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s16>) = G_INSERT_VECTOR_ELT [[UV]], [[TRUNC]](s16), [[C1]](s64)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[IVEC]](<4 x s16>)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV2]](s16)
+ ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[UV3]](s16)
+ ; CHECK-NEXT: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[UV4]](s16)
+ ; CHECK-NEXT: [[TRUNC4:%[0-9]+]]:_(s8) = G_TRUNC [[UV5]](s16)
+ ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC1]](s8), [[TRUNC2]](s8), [[TRUNC3]](s8), [[TRUNC4]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<8 x s8>) = G_SHUFFLE_VECTOR [[BUILD_VECTOR1]](<8 x s8>), [[BUILD_VECTOR2]], shufflemask(0, 0, 0, 0, undef, undef, undef, undef)
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(<4 x s8>), [[UV5:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:_(s16) = COPY [[C2]](s16)
- ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[COPY3]](s16), [[COPY4]](s16), [[COPY5]](s16), [[C2]](s16)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>)
- ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[ANYEXT]], [[BUILD_VECTOR3]]
+ ; CHECK-NEXT: [[UV6:%[0-9]+]]:_(<4 x s8>), [[UV7:%[0-9]+]]:_(<4 x s8>) = G_UNMERGE_VALUES [[SHUF]](<8 x s8>)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s8) = G_CONSTANT i8 1
+ ; CHECK-NEXT: [[UV8:%[0-9]+]]:_(s8), [[UV9:%[0-9]+]]:_(s8), [[UV10:%[0-9]+]]:_(s8), [[UV11:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV6]](<4 x s8>)
+ ; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV8]](s8), [[UV9]](s8), [[UV10]](s8), [[UV11]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR3]](<8 x s8>)
+ ; CHECK-NEXT: [[UV12:%[0-9]+]]:_(<4 x s16>), [[UV13:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT1]](<8 x s16>)
+ ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[C2]](s8), [[C2]](s8), [[C2]](s8), [[C2]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR4]](<8 x s8>)
+ ; CHECK-NEXT: [[UV14:%[0-9]+]]:_(<4 x s16>), [[UV15:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT2]](<8 x s16>)
+ ; CHECK-NEXT: [[XOR:%[0-9]+]]:_(<4 x s16>) = G_XOR [[UV12]], [[UV14]]
; CHECK-NEXT: [[TRUNC5:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP]](<4 x s32>)
- ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[UV4]](<4 x s8>)
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC5]], [[ANYEXT1]]
+ ; CHECK-NEXT: [[UV16:%[0-9]+]]:_(s8), [[UV17:%[0-9]+]]:_(s8), [[UV18:%[0-9]+]]:_(s8), [[UV19:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UV6]](<4 x s8>)
+ ; CHECK-NEXT: [[BUILD_VECTOR5:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[UV16]](s8), [[UV17]](s8), [[UV18]](s8), [[UV19]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8), [[DEF1]](s8)
+ ; CHECK-NEXT: [[ANYEXT3:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR5]](<8 x s8>)
+ ; CHECK-NEXT: [[UV20:%[0-9]+]]:_(<4 x s16>), [[UV21:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT3]](<8 x s16>)
+ ; CHECK-NEXT: [[AND:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC5]], [[UV20]]
; CHECK-NEXT: [[TRUNC6:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[ICMP1]](<4 x s32>)
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(<4 x s16>) = G_AND [[TRUNC6]], [[XOR]]
; CHECK-NEXT: [[OR:%[0-9]+]]:_(<4 x s16>) = G_OR [[AND]], [[AND1]]
- ; CHECK-NEXT: [[ANYEXT2:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>)
+ ; CHECK-NEXT: [[ANYEXT4:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[OR]](<4 x s16>)
; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
- ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT2]], [[BUILD_VECTOR4]]
+ ; CHECK-NEXT: [[BUILD_VECTOR6:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[C3]](s32), [[C3]](s32), [[C3]](s32), [[C3]](s32)
+ ; CHECK-NEXT: %zext_select:_(<4 x s32>) = G_AND [[ANYEXT4]], [[BUILD_VECTOR6]]
; CHECK-NEXT: $q0 = COPY %zext_select(<4 x s32>)
; CHECK-NEXT: RET_ReallyLR implicit $q0
%w0:_(s32) = COPY $w0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
index 42a8f51002f200..f7efaeaa507053 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-shuffle-vector-widen-crash.ll
@@ -1,16 +1,24 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -global-isel-abort=2 -global-isel -o - %s | FileCheck %s
+; RUN: llc -global-isel -o - %s | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-macosx11.0.0"
declare i32 @llvm.aarch64.neon.uaddv.i32.v4i32(<4 x i32>) #0
-; This test currently falls back but ensures we don't crash.
-
define i32 @bar() {
; CHECK-LABEL: bar:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: movi.2d v0, #0000000000000000
+; CHECK-NEXT: mov b1, v0[1]
+; CHECK-NEXT: mov b2, v0[2]
+; CHECK-NEXT: mov b3, v0[3]
+; CHECK-NEXT: mov.h v0[1], v1[0]
+; CHECK-NEXT: mov.h v2[1], v3[0]
+; CHECK-NEXT: ushll.4s v0, v0, #0
+; CHECK-NEXT: ushll.4s v1, v2, #0
+; CHECK-NEXT: mov.d v0[1], v1[0]
+; CHECK-NEXT: movi.4s v1, #1
+; CHECK-NEXT: and.16b v0, v0, v1
; CHECK-NEXT: addv.4s s0, v0
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir
index ed40a2ff7ea70f..e729f027baa715 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-xtn.mir
@@ -541,17 +541,13 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
- ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
- ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[DEF]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
- ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR]](<4 x s32>)
- ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(<4 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<4 x s32>)
- ; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<4 x s16>), [[TRUNC1]](<4 x s16>)
- ; CHECK-NEXT: [[TRUNC2:%[0-9]+]]:_(<8 x s8>) = G_TRUNC [[CONCAT_VECTORS]](<8 x s16>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<2 x s8>), [[UV3:%[0-9]+]]:_(<2 x s8>), [[UV4:%[0-9]+]]:_(<2 x s8>), [[UV5:%[0-9]+]]:_(<2 x s8>) = G_UNMERGE_VALUES [[TRUNC2]](<8 x s8>)
- ; CHECK-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s8>) = G_CONCAT_VECTORS [[UV2]](<2 x s8>), [[UV2]](<2 x s8>)
- ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[CONCAT_VECTORS1]](<4 x s8>)
- ; CHECK-NEXT: $d0 = COPY [[ANYEXT]](<4 x s16>)
+ ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[UV]](s32)
+ ; CHECK-NEXT: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[UV1]](s32)
+ ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s8) = G_IMPLICIT_DEF
+ ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s8>) = G_BUILD_VECTOR [[TRUNC]](s8), [[TRUNC1]](s8), [[TRUNC]](s8), [[TRUNC1]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8), [[DEF]](s8)
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(<8 x s16>) = G_ANYEXT [[BUILD_VECTOR]](<8 x s8>)
+ ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(<4 x s16>), [[UV3:%[0-9]+]]:_(<4 x s16>) = G_UNMERGE_VALUES [[ANYEXT]](<8 x s16>)
+ ; CHECK-NEXT: $d0 = COPY [[UV2]](<4 x s16>)
; CHECK-NEXT: RET_ReallyLR implicit $d0
%0:_(<2 x s32>) = COPY $d0
%1:_(<2 x s8>) = G_TRUNC %0(<2 x s32>)
diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
index dbc5417e23133d..61a4f64ac2bfcb 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll
@@ -3,8 +3,7 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve < %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SVE
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-; CHECK-GI: warning: Instruction selection used fallback path for smull_zext_v4i16_v4i32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1
+; CHECK-GI: warning: Instruction selection used fallback path for pmlsl2_v8i16_uzp1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v8i16_uzp1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for umlsl2_v8i16_uzp1
; CHECK-GI-NEXT: warning: Instruction selection used fallback path for smlsl2_v4i32_uzp1
@@ -189,13 +188,49 @@ define <8 x i32> @smull_zext_v8i8_v8i32_top_bit_is_1(ptr %A, ptr %B) nounwind {
}
define <4 x i32> @smull_zext_v4i16_v4i32(ptr %A, ptr %B) nounwind {
-; CHECK-LABEL: smull_zext_v4i16_v4i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: ldr s0, [x0]
-; CHECK-NEXT: ldr d1, [x1]
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h
-; CHECK-NEXT: ret
+; CHECK-NEON-LABEL: smull_zext_v4i16_v4i32:
+; CHECK-NEON: // %bb.0:
+; CHECK-NEON-NEXT: ldr s0, [x0]
+; CHECK-NEON-NEXT: ldr d1, [x1]
+; CHECK-NEON-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEON-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-NEON-NEXT: ret
+;
+; CHECK-SVE-LABEL: smull_zext_v4i16_v4i32:
+; CHECK-SVE: // %bb.0:
+; CHECK-SVE-NEXT: ldr s0, [x0]
+; CHECK-SVE-NEXT: ldr d1, [x1]
+; CHECK-SVE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SVE-NEXT: smull v0.4s, v0.4h, v1.4h
+; CHECK-SVE-NEXT: ret
+;
+; CHECK-GI-LABEL: smull_zext_v4i16_v4i32:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: ldr w8, [x0]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: uxtb w8, w8
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov b2, v0.b[2]
+; CHECK-GI-NEXT: mov b3, v0.b[3]
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: fmov w9, s1
+; CHECK-GI-NEXT: fmov w10, s2
+; CHECK-GI-NEXT: fmov w11, s3
+; CHECK-GI-NEXT: uxtb w9, w9
+; CHECK-GI-NEXT: uxtb w10, w10
+; CHECK-GI-NEXT: uxtb w11, w11
+; CHECK-GI-NEXT: fmov s1, w9
+; CHECK-GI-NEXT: fmov s2, w10
+; CHECK-GI-NEXT: fmov s3, w11
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v2.h[1], v3.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v1.4s, v2.4h, #0
+; CHECK-GI-NEXT: ldr d2, [x1]
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: sshll v1.4s, v2.4h, #0
+; CHECK-GI-NEXT: mul v0.4s, v0.4s, v1.4s
+; CHECK-GI-NEXT: ret
%load.A = load <4 x i8>, ptr %A
%load.B = load <4 x i16>, ptr %B
%zext.A = zext <4 x i8> %load.A to <4 x i32>
diff --git a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
index a1e06936852726..bc399c8d4ff071 100644
--- a/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-extract-insert-varidx.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-SDAG
-; RUN: llc < %s -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-GISEL
+; RUN: llc < %s -global-isel -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast -aarch64-enable-sink-fold=true | FileCheck %s --check-prefix=CHECK-GISEL
define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
; CHECK-SDAG-LABEL: test_varidx_extract_v8s8:
@@ -29,20 +29,20 @@ define <4 x i8> @test_varidx_extract_v8s8(<8 x i8> %x, i32 %idx) {
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
; CHECK-GISEL-NEXT: mov w9, w0
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GISEL-NEXT: mov b1, v0.b[1]
; CHECK-GISEL-NEXT: add x8, sp, #8
-; CHECK-GISEL-NEXT: str d0, [sp, #8]
; CHECK-GISEL-NEXT: and x9, x9, #0x7
-; CHECK-GISEL-NEXT: mov b2, v0.b[1]
+; CHECK-GISEL-NEXT: str d0, [sp, #8]
; CHECK-GISEL-NEXT: mov b3, v0.b[2]
; CHECK-GISEL-NEXT: lsl x10, x9, #1
; CHECK-GISEL-NEXT: mov b0, v0.b[3]
; CHECK-GISEL-NEXT: sub x9, x10, x9
-; CHECK-GISEL-NEXT: ldrb w8, [x8, x9]
-; CHECK-GISEL-NEXT: fmov s1, w8
-; CHECK-GISEL-NEXT: mov v1.h[1], v2.h[0]
-; CHECK-GISEL-NEXT: mov v1.h[2], v3.h[0]
-; CHECK-GISEL-NEXT: mov v1.h[3], v0.h[0]
-; CHECK-GISEL-NEXT: fmov d0, d1
+; CHECK-GISEL-NEXT: ldr b2, [x8, x9]
+; CHECK-GISEL-NEXT: mov v2.b[1], v1.b[0]
+; CHECK-GISEL-NEXT: mov v2.b[2], v3.b[0]
+; CHECK-GISEL-NEXT: mov v2.b[3], v0.b[0]
+; CHECK-GISEL-NEXT: ushll v0.8h, v2.8b, #0
+; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: add sp, sp, #16
; CHECK-GISEL-NEXT: ret
%tmp = extractelement <8 x i8> %x, i32 %idx
@@ -176,17 +176,15 @@ define <2 x i16> @test_varidx_extract_v4s16(<4 x i16> %x, i32 %idx) {
; CHECK-GISEL: // %bb.0:
; CHECK-GISEL-NEXT: sub sp, sp, #16
; CHECK-GISEL-NEXT: .cfi_def_cfa_offset 16
-; CHECK-GISEL-NEXT: mov w9, w0
-; CHECK-GISEL-NEXT: mov w8, #2 // =0x2
-; CHECK-GISEL-NEXT: add x10, sp, #8
-; CHECK-GISEL-NEXT: and x9, x9, #0x3
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GISEL-NEXT: mov w9, w0
+; CHECK-GISEL-NEXT: mov h1, v0.h[1]
+; CHECK-GISEL-NEXT: add x8, sp, #8
; CHECK-GISEL-NEXT: str d0, [sp, #8]
-; CHECK-GISEL-NEXT: madd x8, x9, x8, x10
-; CHECK-GISEL-NEXT: umov w9, v0.h[1]
-; CHECK-GISEL-NEXT: fmov s1, w9
-; CHECK-GISEL-NEXT: ldr h0, [x8]
-; CHECK-GISEL-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GISEL-NEXT: and x9, x9, #0x3
+; CHECK-GISEL-NEXT: ldr h0, [x8, x9, lsl #1]
+; CHECK-GISEL-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GISEL-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GISEL-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GISEL-NEXT: add sp, sp, #16
; CHECK-GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/bitcast.ll b/llvm/test/CodeGen/AArch64/bitcast.ll
index b7c02d6855a9e9..e0851fd8739ec9 100644
--- a/llvm/test/CodeGen/AArch64/bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast.ll
@@ -1,14 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; PR23065: SCALAR_TO_VECTOR implies the top elements 1 to N-1 of the N-element vector are undefined.
-; CHECK-GI: warning: Instruction selection used fallback path for bitcast_i32_v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_i32_v2i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v2i16_v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for bitcast_v4i8_v2i16
-
define <4 x i16> @foo1(<2 x i32> %a) {
; CHECK-SD-LABEL: foo1:
; CHECK-SD: // %bb.0:
@@ -80,12 +75,26 @@ define i32 @bitcast_v4i8_i32(<4 x i8> %a, <4 x i8> %b){
}
define <4 x i8> @bitcast_i32_v4i8(i32 %a, i32 %b){
-; CHECK-LABEL: bitcast_i32_v4i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: zip1 v0.8b, v0.8b, v0.8b
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_i32_v4i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add w8, w0, w1
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_i32_v4i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, w1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov b2, v0.b[2]
+; CHECK-GI-NEXT: mov b3, v0.b[3]
+; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v3.b[0]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%c = add i32 %a, %b
%d = bitcast i32 %c to <4 x i8>
ret <4 x i8> %d
@@ -119,13 +128,23 @@ define i32 @bitcast_v2i16_i32(<2 x i16> %a, <2 x i16> %b){
}
define <2 x i16> @bitcast_i32_v2i16(i32 %a, i32 %b){
-; CHECK-LABEL: bitcast_i32_v2i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_i32_v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: add w8, w0, w1
+; CHECK-SD-NEXT: fmov s0, w8
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_i32_v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add w8, w0, w1
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%c = add i32 %a, %b
%d = bitcast i32 %c to <2 x i16>
ret <2 x i16> %d
@@ -382,40 +401,72 @@ define <8 x i16> @bitcast_v16i8_v8i16(<16 x i8> %a, <16 x i8> %b){
; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
define <4 x i8> @bitcast_v2i16_v4i8(<2 x i16> %a, <2 x i16> %b){
-; CHECK-LABEL: bitcast_v2i16_v4i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: add v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: mov w8, v0.s[1]
-; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: strh w9, [sp, #12]
-; CHECK-NEXT: strh w8, [sp, #14]
-; CHECK-NEXT: ldr s0, [sp, #12]
-; CHECK-NEXT: ushll v0.8h, v0.8b, #0
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v2i16_v4i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #16
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: mov w8, v0.s[1]
+; CHECK-SD-NEXT: fmov w9, s0
+; CHECK-SD-NEXT: strh w9, [sp, #12]
+; CHECK-SD-NEXT: strh w8, [sp, #14]
+; CHECK-SD-NEXT: ldr s0, [sp, #12]
+; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: add sp, sp, #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v2i16_v4i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.2s, v0.2s, v1.2s
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov b2, v0.b[2]
+; CHECK-GI-NEXT: mov b3, v0.b[3]
+; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v3.b[0]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%c = add <2 x i16> %a, %b
%d = bitcast <2 x i16> %c to <4 x i8>
ret <4 x i8> %d
}
define <2 x i16> @bitcast_v4i8_v2i16(<4 x i8> %a, <4 x i8> %b){
-; CHECK-LABEL: bitcast_v4i8_v2i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: add x8, sp, #12
-; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b
-; CHECK-NEXT: str s0, [sp, #12]
-; CHECK-NEXT: ld1 { v0.h }[0], [x8]
-; CHECK-NEXT: orr x8, x8, #0x2
-; CHECK-NEXT: ld1 { v0.h }[2], [x8]
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bitcast_v4i8_v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #16
+; CHECK-SD-NEXT: .cfi_def_cfa_offset 16
+; CHECK-SD-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: add x8, sp, #12
+; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b
+; CHECK-SD-NEXT: str s0, [sp, #12]
+; CHECK-SD-NEXT: ld1 { v0.h }[0], [x8]
+; CHECK-SD-NEXT: orr x8, x8, #0x2
+; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8]
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: add sp, sp, #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bitcast_v4i8_v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: mov h3, v0.h[3]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: mov v0.h[2], v2.h[0]
+; CHECK-GI-NEXT: mov v0.h[3], v3.h[0]
+; CHECK-GI-NEXT: xtn v0.8b, v0.8h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%c = add <4 x i8> %a, %b
%d = bitcast <4 x i8> %c to <2 x i16>
ret <2 x i16> %d
diff --git a/llvm/test/CodeGen/AArch64/bswap.ll b/llvm/test/CodeGen/AArch64/bswap.ll
index f4221accfcbc52..071613b9cc011e 100644
--- a/llvm/test/CodeGen/AArch64/bswap.ll
+++ b/llvm/test/CodeGen/AArch64/bswap.ll
@@ -1,8 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for bswap_v2i16
+; RUN: llc -mtriple=aarch64 -global-isel %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; ====== Scalar Tests =====
define i16 @bswap_i16(i16 %a){
@@ -103,11 +101,23 @@ declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
; ===== Smaller/Larger Width Vectors with Legal Element Sizes =====
define <2 x i16> @bswap_v2i16(<2 x i16> %a){
-; CHECK-LABEL: bswap_v2i16:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: rev32 v0.8b, v0.8b
-; CHECK-NEXT: ushr v0.2s, v0.2s, #16
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: bswap_v2i16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: rev32 v0.8b, v0.8b
+; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: bswap_v2i16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: rev16 v0.8b, v0.8b
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
entry:
%res = call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %a)
ret <2 x i16> %res
diff --git a/llvm/test/CodeGen/AArch64/fptoi.ll b/llvm/test/CodeGen/AArch64/fptoi.ll
index 7af01b53dae7e8..01585d02adcb13 100644
--- a/llvm/test/CodeGen/AArch64/fptoi.ll
+++ b/llvm/test/CodeGen/AArch64/fptoi.ll
@@ -1,13 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
-; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
-
-; CHECK-GI-FP16: warning: Instruction selection used fallback path for fptos_v2f16_v2i16
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i16
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fptos_v2f16_v2i8
-; CHECK-GI-FP16-NEXT: warning: Instruction selection used fallback path for fptou_v2f16_v2i8
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
+; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
define i64 @fptos_f64_i64(double %a) {
; CHECK-LABEL: fptos_f64_i64:
@@ -5184,8 +5179,13 @@ define <2 x i16> @fptos_v2f16_v2i16(<2 x half> %a) {
;
; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i16:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-GI-FP16-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-FP16-NEXT: ret
entry:
@@ -5212,8 +5212,13 @@ define <2 x i16> @fptou_v2f16_v2i16(<2 x half> %a) {
;
; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i16:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-GI-FP16-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-FP16-NEXT: ret
entry:
@@ -5658,8 +5663,13 @@ define <2 x i8> @fptos_v2f16_v2i8(<2 x half> %a) {
;
; CHECK-GI-FP16-LABEL: fptos_v2f16_v2i8:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-GI-FP16-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: fcvtzs v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-FP16-NEXT: ret
entry:
@@ -5686,8 +5696,13 @@ define <2 x i8> @fptou_v2f16_v2i8(<2 x half> %a) {
;
; CHECK-GI-FP16-LABEL: fptou_v2f16_v2i8:
; CHECK-GI-FP16: // %bb.0: // %entry
-; CHECK-GI-FP16-NEXT: fcvtl v0.4s, v0.4h
-; CHECK-GI-FP16-NEXT: fcvtzs v0.4s, v0.4s
+; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: fcvtzu v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-FP16-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/itofp.ll b/llvm/test/CodeGen/AArch64/itofp.ll
index ceac37f91238a8..f5a7b5dc9f4922 100644
--- a/llvm/test/CodeGen/AArch64/itofp.ll
+++ b/llvm/test/CodeGen/AArch64/itofp.ll
@@ -4,13 +4,6 @@
; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
-; CHECK-GI: warning: Instruction selection used fallback path for stofp_v3i8_v3f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3f64
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for stofp_v3i8_v3f32
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3f32
-; CHECK-GI-NOFP16-NEXT: warning: Instruction selection used fallback path for stofp_v3i8_v3f16
-; CHECK-GI-NOFP16-NEXT: warning: Instruction selection used fallback path for utofp_v3i8_v3f16
-
define double @stofp_i64_f64(i64 %a) {
; CHECK-LABEL: stofp_i64_f64:
; CHECK: // %bb.0: // %entry
@@ -1754,47 +1747,109 @@ entry:
}
define <3 x double> @stofp_v3i8_v3f64(<3 x i8> %a) {
-; CHECK-LABEL: stofp_v3i8_v3f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: fmov s1, w2
-; CHECK-NEXT: mov v0.s[1], w1
-; CHECK-NEXT: shl v1.2s, v1.2s, #24
-; CHECK-NEXT: sshr v1.2s, v1.2s, #24
-; CHECK-NEXT: shl v0.2s, v0.2s, #24
-; CHECK-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-NEXT: sshr v0.2s, v0.2s, #24
-; CHECK-NEXT: scvtf v2.2d, v1.2d
-; CHECK-NEXT: sshll v0.2d, v0.2s, #0
-; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT: scvtf v0.2d, v0.2d
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: stofp_v3i8_v3f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: fmov s1, w2
+; CHECK-SD-NEXT: mov v0.s[1], w1
+; CHECK-SD-NEXT: shl v1.2s, v1.2s, #24
+; CHECK-SD-NEXT: sshr v1.2s, v1.2s, #24
+; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24
+; CHECK-SD-NEXT: sshll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24
+; CHECK-SD-NEXT: scvtf v2.2d, v1.2d
+; CHECK-SD-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: scvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: stofp_v3i8_v3f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-GI-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT: smov x8, v0.s[0]
+; CHECK-GI-NEXT: smov x9, v0.s[1]
+; CHECK-GI-NEXT: sshll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT: fmov d1, x8
+; CHECK-GI-NEXT: smov x8, v0.s[0]
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: smov x9, v0.s[1]
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: scvtf v0.2d, v1.2d
+; CHECK-GI-NEXT: mov v2.d[1], x9
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: scvtf v2.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT: ret
entry:
%c = sitofp <3 x i8> %a to <3 x double>
ret <3 x double> %c
}
define <3 x double> @utofp_v3i8_v3f64(<3 x i8> %a) {
-; CHECK-LABEL: utofp_v3i8_v3f64:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-NEXT: fmov s2, w2
-; CHECK-NEXT: mov v0.s[1], w1
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v1.8b, v2.8b, v1.8b
-; CHECK-NEXT: ushll v0.2d, v0.2s, #0
-; CHECK-NEXT: ushll v1.2d, v1.2s, #0
-; CHECK-NEXT: ucvtf v0.2d, v0.2d
-; CHECK-NEXT: ucvtf v2.2d, v1.2d
-; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q2
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
-; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: utofp_v3i8_v3f64:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-SD-NEXT: fmov s2, w2
+; CHECK-SD-NEXT: mov v0.s[1], w1
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-SD-NEXT: and v1.8b, v2.8b, v1.8b
+; CHECK-SD-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-SD-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-SD-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-SD-NEXT: ucvtf v2.2d, v1.2d
+; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
+; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: utofp_v3i8_v3f64:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT: mov w8, v0.s[0]
+; CHECK-GI-NEXT: mov w9, v0.s[1]
+; CHECK-GI-NEXT: ushll v0.4s, v1.4h, #0
+; CHECK-GI-NEXT: fmov d1, x8
+; CHECK-GI-NEXT: mov w8, v0.s[0]
+; CHECK-GI-NEXT: mov v1.d[1], x9
+; CHECK-GI-NEXT: mov w9, v0.s[1]
+; CHECK-GI-NEXT: fmov d2, x8
+; CHECK-GI-NEXT: ucvtf v0.2d, v1.2d
+; CHECK-GI-NEXT: mov v2.d[1], x9
+; CHECK-GI-NEXT: mov d1, v0.d[1]
+; CHECK-GI-NEXT: ucvtf v2.2d, v2.2d
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: // kill: def $d2 killed $d2 killed $q2
+; CHECK-GI-NEXT: ret
entry:
%c = uitofp <3 x i8> %a to <3 x double>
ret <3 x double> %c
@@ -3372,31 +3427,71 @@ entry:
}
define <3 x float> @stofp_v3i8_v3f32(<3 x i8> %a) {
-; CHECK-LABEL: stofp_v3i8_v3f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: mov v0.h[1], w1
-; CHECK-NEXT: mov v0.h[2], w2
-; CHECK-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-NEXT: sshll v0.4s, v0.4h, #0
-; CHECK-NEXT: scvtf v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: stofp_v3i8_v3f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: scvtf v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: stofp_v3i8_v3f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-GI-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: scvtf v0.4s, v0.4s
+; CHECK-GI-NEXT: ret
entry:
%c = sitofp <3 x i8> %a to <3 x float>
ret <3 x float> %c
}
define <3 x float> @utofp_v3i8_v3f32(<3 x i8> %a) {
-; CHECK-LABEL: utofp_v3i8_v3f32:
-; CHECK: // %bb.0: // %entry
-; CHECK-NEXT: fmov s0, w0
-; CHECK-NEXT: mov v0.h[1], w1
-; CHECK-NEXT: mov v0.h[2], w2
-; CHECK-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-NEXT: ushll v0.4s, v0.4h, #0
-; CHECK-NEXT: ucvtf v0.4s, v0.4s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: utofp_v3i8_v3f32:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: mov v0.h[1], w1
+; CHECK-SD-NEXT: mov v0.h[2], w2
+; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-SD-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: utofp_v3i8_v3f32:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fmov s0, w0
+; CHECK-GI-NEXT: fmov s1, w1
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: fmov s1, w2
+; CHECK-GI-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NEXT: movi d1, #0xff00ff00ff00ff
+; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
+; CHECK-GI-NEXT: ucvtf v0.4s, v0.4s
+; CHECK-GI-NEXT: ret
entry:
%c = uitofp <3 x i8> %a to <3 x float>
ret <3 x float> %c
@@ -5582,12 +5677,18 @@ define <2 x half> @utofp_v2i8_v2f16(<2 x i8> %a) {
; CHECK-GI-FP16-LABEL: utofp_v2i8_v2f16:
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-GI-FP16-NEXT: mov w8, v0.s[1]
-; CHECK-GI-FP16-NEXT: fmov w9, s0
-; CHECK-GI-FP16-NEXT: and w9, w9, #0xff
-; CHECK-GI-FP16-NEXT: and w8, w8, #0xff
-; CHECK-GI-FP16-NEXT: ucvtf h0, w9
-; CHECK-GI-FP16-NEXT: ucvtf h1, w8
+; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
+; CHECK-GI-FP16-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-FP16-NEXT: xtn v0.4h, v0.4s
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: movi d1, #0x0000ff000000ff
+; CHECK-GI-FP16-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-FP16-NEXT: mov s1, v0.s[1]
+; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
+; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-FP16-NEXT: ret
@@ -5622,11 +5723,20 @@ define <3 x half> @stofp_v3i8_v3f16(<3 x i8> %a) {
; CHECK-GI-NOFP16-LABEL: stofp_v3i8_v3f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
; CHECK-GI-NOFP16-NEXT: fmov s0, w0
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w1
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2
+; CHECK-GI-NOFP16-NEXT: fmov s1, w1
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fmov s1, w2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[0]
; CHECK-GI-NOFP16-NEXT: shl v0.4h, v0.4h, #8
; CHECK-GI-NOFP16-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
; CHECK-GI-NOFP16-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-GI-NOFP16-NEXT: sshll v1.4s, v1.4h, #0
+; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NOFP16-NEXT: scvtf v0.4s, v0.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
@@ -5635,11 +5745,10 @@ define <3 x half> @stofp_v3i8_v3f16(<3 x i8> %a) {
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: fmov s0, w0
; CHECK-GI-FP16-NEXT: fmov s1, w1
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.b[1], v1.b[0]
; CHECK-GI-FP16-NEXT: fmov s1, w2
-; CHECK-GI-FP16-NEXT: mov v0.h[2], v1.h[0]
-; CHECK-GI-FP16-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-GI-FP16-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-GI-FP16-NEXT: mov v0.b[2], v1.b[0]
+; CHECK-GI-FP16-NEXT: sshll v0.8h, v0.8b, #0
; CHECK-GI-FP16-NEXT: scvtf v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: ret
entry:
@@ -5671,10 +5780,20 @@ define <3 x half> @utofp_v3i8_v3f16(<3 x i8> %a) {
; CHECK-GI-NOFP16-LABEL: utofp_v3i8_v3f16:
; CHECK-GI-NOFP16: // %bb.0: // %entry
; CHECK-GI-NOFP16-NEXT: fmov s0, w0
-; CHECK-GI-NOFP16-NEXT: mov v0.h[1], w1
-; CHECK-GI-NOFP16-NEXT: mov v0.h[2], w2
-; CHECK-GI-NOFP16-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-GI-NOFP16-NEXT: fmov s1, w1
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: fmov s1, w2
+; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v1.h[0]
+; CHECK-GI-NOFP16-NEXT: movi d1, #0xff00ff00ff00ff
+; CHECK-GI-NOFP16-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-NOFP16-NEXT: mov s1, v0.s[1]
+; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NOFP16-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NOFP16-NEXT: mov v1.h[1], v3.h[0]
; CHECK-GI-NOFP16-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NOFP16-NEXT: ushll v1.4s, v1.4h, #0
+; CHECK-GI-NOFP16-NEXT: mov v0.d[1], v1.d[0]
; CHECK-GI-NOFP16-NEXT: ucvtf v0.4s, v0.4s
; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
; CHECK-GI-NOFP16-NEXT: ret
@@ -5683,11 +5802,10 @@ define <3 x half> @utofp_v3i8_v3f16(<3 x i8> %a) {
; CHECK-GI-FP16: // %bb.0: // %entry
; CHECK-GI-FP16-NEXT: fmov s0, w0
; CHECK-GI-FP16-NEXT: fmov s1, w1
-; CHECK-GI-FP16-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-FP16-NEXT: mov v0.b[1], v1.b[0]
; CHECK-GI-FP16-NEXT: fmov s1, w2
-; CHECK-GI-FP16-NEXT: mov v0.h[2], v1.h[0]
-; CHECK-GI-FP16-NEXT: movi d1, #0xff00ff00ff00ff
-; CHECK-GI-FP16-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-GI-FP16-NEXT: mov v0.b[2], v1.b[0]
+; CHECK-GI-FP16-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-GI-FP16-NEXT: ucvtf v0.4h, v0.4h
; CHECK-GI-FP16-NEXT: ret
entry:
diff --git a/llvm/test/CodeGen/AArch64/load.ll b/llvm/test/CodeGen/AArch64/load.ll
index 39143e5c53ffd1..c3c0ec5e3d9d8d 100644
--- a/llvm/test/CodeGen/AArch64/load.ll
+++ b/llvm/test/CodeGen/AArch64/load.ll
@@ -159,7 +159,8 @@ define <2 x i16> @load_v2i16(ptr %ptr){
; CHECK-GI: // %bb.0:
; CHECK-GI-NEXT: ldr h0, [x0]
; CHECK-GI-NEXT: ldr h1, [x0, #2]
-; CHECK-GI-NEXT: mov v0.s[1], v1.s[0]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-GI-NEXT: ret
%a = load <2 x i16>, ptr %ptr
diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
index 01620652301ed4..57f220f621cf8b 100644
--- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll
@@ -1117,8 +1117,15 @@ define <4 x i16> @vselect_constant_cond_zero_v4i16(<4 x i16> %a) {
;
; CHECK-GI-LABEL: vselect_constant_cond_zero_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI84_0
-; CHECK-GI-NEXT: ldr d1, [x8, :lo12:.LCPI84_0]
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w9, #0 // =0x0
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: fmov s2, w9
+; CHECK-GI-NEXT: mov v3.16b, v1.16b
+; CHECK-GI-NEXT: mov v3.b[1], v2.b[0]
+; CHECK-GI-NEXT: mov v3.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v3.b[3], v1.b[0]
+; CHECK-GI-NEXT: ushll v1.8h, v3.8b, #0
; CHECK-GI-NEXT: shl v1.4h, v1.4h, #15
; CHECK-GI-NEXT: sshr v1.4h, v1.4h, #15
; CHECK-GI-NEXT: and v0.8b, v0.8b, v1.8b
@@ -1137,8 +1144,16 @@ define <4 x i32> @vselect_constant_cond_zero_v4i32(<4 x i32> %a) {
;
; CHECK-GI-LABEL: vselect_constant_cond_zero_v4i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI85_0
-; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI85_0]
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w9, #0 // =0x0
+; CHECK-GI-NEXT: fmov s1, w8
+; CHECK-GI-NEXT: fmov s2, w9
+; CHECK-GI-NEXT: mov v3.16b, v1.16b
+; CHECK-GI-NEXT: mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT: mov v2.h[1], v1.h[0]
+; CHECK-GI-NEXT: ushll v1.4s, v3.4h, #0
+; CHECK-GI-NEXT: ushll v2.4s, v2.4h, #0
+; CHECK-GI-NEXT: mov v1.d[1], v2.d[0]
; CHECK-GI-NEXT: shl v1.4s, v1.4s, #31
; CHECK-GI-NEXT: sshr v1.4s, v1.4s, #31
; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b
@@ -1181,8 +1196,15 @@ define <4 x i16> @vselect_constant_cond_v4i16(<4 x i16> %a, <4 x i16> %b) {
;
; CHECK-GI-LABEL: vselect_constant_cond_v4i16:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI87_0
-; CHECK-GI-NEXT: ldr d2, [x8, :lo12:.LCPI87_0]
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w9, #0 // =0x0
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: fmov s3, w9
+; CHECK-GI-NEXT: mov v4.16b, v2.16b
+; CHECK-GI-NEXT: mov v4.b[1], v3.b[0]
+; CHECK-GI-NEXT: mov v4.b[2], v3.b[0]
+; CHECK-GI-NEXT: mov v4.b[3], v2.b[0]
+; CHECK-GI-NEXT: ushll v2.8h, v4.8b, #0
; CHECK-GI-NEXT: shl v2.4h, v2.4h, #15
; CHECK-GI-NEXT: sshr v2.4h, v2.4h, #15
; CHECK-GI-NEXT: bif v0.8b, v1.8b, v2.8b
@@ -1201,8 +1223,16 @@ define <4 x i32> @vselect_constant_cond_v4i32(<4 x i32> %a, <4 x i32> %b) {
;
; CHECK-GI-LABEL: vselect_constant_cond_v4i32:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: adrp x8, .LCPI88_0
-; CHECK-GI-NEXT: ldr q2, [x8, :lo12:.LCPI88_0]
+; CHECK-GI-NEXT: mov w8, #1 // =0x1
+; CHECK-GI-NEXT: mov w9, #0 // =0x0
+; CHECK-GI-NEXT: fmov s2, w8
+; CHECK-GI-NEXT: fmov s3, w9
+; CHECK-GI-NEXT: mov v4.16b, v2.16b
+; CHECK-GI-NEXT: mov v4.h[1], v3.h[0]
+; CHECK-GI-NEXT: mov v3.h[1], v2.h[0]
+; CHECK-GI-NEXT: ushll v2.4s, v4.4h, #0
+; CHECK-GI-NEXT: ushll v3.4s, v3.4h, #0
+; CHECK-GI-NEXT: mov v2.d[1], v3.d[0]
; CHECK-GI-NEXT: shl v2.4s, v2.4s, #31
; CHECK-GI-NEXT: sshr v2.4s, v2.4s, #31
; CHECK-GI-NEXT: bif v0.16b, v1.16b, v2.16b
diff --git a/llvm/test/CodeGen/AArch64/shift.ll b/llvm/test/CodeGen/AArch64/shift.ll
index 5287839ee7b705..9c8d3e0f07de87 100644
--- a/llvm/test/CodeGen/AArch64/shift.ll
+++ b/llvm/test/CodeGen/AArch64/shift.ll
@@ -1,13 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc -mtriple=aarch64 -global-isel -global-isel-abort=2 -verify-machineinstrs %s -o - 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for shl_v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for shl_v2i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for ashr_v2i16
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_v4i8
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for lshr_v2i16
+; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i1 @shl_i1(i1 %0, i1 %1){
; CHECK-SD-LABEL: shl_i1:
@@ -530,11 +523,38 @@ define <2 x i64> @lshr_v2i64(<2 x i64> %0, <2 x i64> %1){
; ===== Vector Larger/Smaller than Legal =====
define <4 x i8> @shl_v4i8(<4 x i8> %0, <4 x i8> %1){
-; CHECK-LABEL: shl_v4i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: bic v1.4h, #255, lsl #8
-; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shl_v4i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8
+; CHECK-SD-NEXT: ushl v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shl_v4i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov h2, v0.h[1]
+; CHECK-GI-NEXT: mov h3, v1.h[1]
+; CHECK-GI-NEXT: mov h4, v0.h[2]
+; CHECK-GI-NEXT: mov h5, v0.h[3]
+; CHECK-GI-NEXT: mov h6, v1.h[3]
+; CHECK-GI-NEXT: mov v0.b[1], v2.b[0]
+; CHECK-GI-NEXT: mov h2, v1.h[2]
+; CHECK-GI-NEXT: mov v1.b[1], v3.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v4.b[0]
+; CHECK-GI-NEXT: mov v1.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v5.b[0]
+; CHECK-GI-NEXT: mov v1.b[3], v6.b[0]
+; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov b2, v0.b[2]
+; CHECK-GI-NEXT: mov b3, v0.b[3]
+; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v3.b[0]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%3 = shl <4 x i8> %0, %1
ret <4 x i8> %3
}
@@ -556,12 +576,27 @@ define <32 x i8> @shl_v32i8(<32 x i8> %0, <32 x i8> %1){
}
define <2 x i16> @shl_v2i16(<2 x i16> %0, <2 x i16> %1){
-; CHECK-LABEL: shl_v2i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: shl_v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT: ushl v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: shl_v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov s2, v0.s[1]
+; CHECK-GI-NEXT: mov s3, v1.s[1]
+; CHECK-GI-NEXT: mov v0.h[1], v2.h[0]
+; CHECK-GI-NEXT: mov v1.h[1], v3.h[0]
+; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%3 = shl <2 x i16> %0, %1
ret <2 x i16> %3
}
@@ -633,14 +668,42 @@ define <4 x i64> @shl_v4i64(<4 x i64> %0, <4 x i64> %1){
}
define <4 x i8> @ashr_v4i8(<4 x i8> %0, <4 x i8> %1){
-; CHECK-LABEL: ashr_v4i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: shl v0.4h, v0.4h, #8
-; CHECK-NEXT: bic v1.4h, #255, lsl #8
-; CHECK-NEXT: sshr v0.4h, v0.4h, #8
-; CHECK-NEXT: neg v1.4h, v1.4h
-; CHECK-NEXT: sshl v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ashr_v4i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8
+; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-SD-NEXT: neg v1.4h, v1.4h
+; CHECK-SD-NEXT: sshl v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ashr_v4i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov h3, v0.h[1]
+; CHECK-GI-NEXT: mov h4, v1.h[2]
+; CHECK-GI-NEXT: mov h5, v1.h[3]
+; CHECK-GI-NEXT: mov h6, v0.h[3]
+; CHECK-GI-NEXT: mov v1.b[1], v2.b[0]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: mov v0.b[1], v3.b[0]
+; CHECK-GI-NEXT: mov v1.b[2], v4.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v1.b[3], v5.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v6.b[0]
+; CHECK-GI-NEXT: neg v1.8b, v1.8b
+; CHECK-GI-NEXT: sshl v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov b2, v0.b[2]
+; CHECK-GI-NEXT: mov b3, v0.b[3]
+; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v3.b[0]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%3 = ashr <4 x i8> %0, %1
ret <4 x i8> %3
}
@@ -658,15 +721,31 @@ define <32 x i8> @ashr_v32i8(<32 x i8> %0, <32 x i8> %1){
}
define <2 x i16> @ashr_v2i16(<2 x i16> %0, <2 x i16> %1){
-; CHECK-LABEL: ashr_v2i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-NEXT: shl v0.2s, v0.2s, #16
-; CHECK-NEXT: sshr v0.2s, v0.2s, #16
-; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-NEXT: neg v1.2s, v1.2s
-; CHECK-NEXT: sshl v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: ashr_v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16
+; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #16
+; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT: neg v1.2s, v1.2s
+; CHECK-SD-NEXT: sshl v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: ashr_v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov s3, v0.s[1]
+; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v3.h[0]
+; CHECK-GI-NEXT: neg v1.4h, v1.4h
+; CHECK-GI-NEXT: sshl v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%3 = ashr <2 x i16> %0, %1
ret <2 x i16> %3
}
@@ -727,13 +806,41 @@ define <4 x i64> @ashr_v4i64(<4 x i64> %0, <4 x i64> %1){
}
define <4 x i8> @lshr_v4i8(<4 x i8> %0, <4 x i8> %1){
-; CHECK-LABEL: lshr_v4i8:
-; CHECK: // %bb.0:
-; CHECK-NEXT: bic v1.4h, #255, lsl #8
-; CHECK-NEXT: bic v0.4h, #255, lsl #8
-; CHECK-NEXT: neg v1.4h, v1.4h
-; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: lshr_v4i8:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: bic v1.4h, #255, lsl #8
+; CHECK-SD-NEXT: bic v0.4h, #255, lsl #8
+; CHECK-SD-NEXT: neg v1.4h, v1.4h
+; CHECK-SD-NEXT: ushl v0.4h, v0.4h, v1.4h
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: lshr_v4i8:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov h2, v1.h[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov h3, v0.h[1]
+; CHECK-GI-NEXT: mov h4, v1.h[2]
+; CHECK-GI-NEXT: mov h5, v1.h[3]
+; CHECK-GI-NEXT: mov h6, v0.h[3]
+; CHECK-GI-NEXT: mov v1.b[1], v2.b[0]
+; CHECK-GI-NEXT: mov h2, v0.h[2]
+; CHECK-GI-NEXT: mov v0.b[1], v3.b[0]
+; CHECK-GI-NEXT: mov v1.b[2], v4.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v1.b[3], v5.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v6.b[0]
+; CHECK-GI-NEXT: neg v1.8b, v1.8b
+; CHECK-GI-NEXT: ushl v0.8b, v0.8b, v1.8b
+; CHECK-GI-NEXT: mov b1, v0.b[1]
+; CHECK-GI-NEXT: mov b2, v0.b[2]
+; CHECK-GI-NEXT: mov b3, v0.b[3]
+; CHECK-GI-NEXT: mov v0.b[1], v1.b[0]
+; CHECK-GI-NEXT: mov v0.b[2], v2.b[0]
+; CHECK-GI-NEXT: mov v0.b[3], v3.b[0]
+; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%3 = lshr <4 x i8> %0, %1
ret <4 x i8> %3
}
@@ -751,14 +858,30 @@ define <32 x i8> @lshr_v32i8(<32 x i8> %0, <32 x i8> %1){
}
define <2 x i16> @lshr_v2i16(<2 x i16> %0, <2 x i16> %1){
-; CHECK-LABEL: lshr_v2i16:
-; CHECK: // %bb.0:
-; CHECK-NEXT: movi d2, #0x00ffff0000ffff
-; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
-; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
-; CHECK-NEXT: neg v1.2s, v1.2s
-; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s
-; CHECK-NEXT: ret
+; CHECK-SD-LABEL: lshr_v2i16:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff
+; CHECK-SD-NEXT: and v1.8b, v1.8b, v2.8b
+; CHECK-SD-NEXT: and v0.8b, v0.8b, v2.8b
+; CHECK-SD-NEXT: neg v1.2s, v1.2s
+; CHECK-SD-NEXT: ushl v0.2s, v0.2s, v1.2s
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: lshr_v2i16:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
+; CHECK-GI-NEXT: mov s2, v1.s[1]
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
+; CHECK-GI-NEXT: mov s3, v0.s[1]
+; CHECK-GI-NEXT: mov v1.h[1], v2.h[0]
+; CHECK-GI-NEXT: mov v0.h[1], v3.h[0]
+; CHECK-GI-NEXT: neg v1.4h, v1.4h
+; CHECK-GI-NEXT: ushl v0.4h, v0.4h, v1.4h
+; CHECK-GI-NEXT: mov h1, v0.h[1]
+; CHECK-GI-NEXT: mov v0.h[1], v1.h[0]
+; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-GI-NEXT: ret
%3 = lshr <2 x i16> %0, %1
ret <2 x i16> %3
}
More information about the llvm-commits
mailing list