[llvm] [AArch64] Unify lowering logic for fixed-length vectors. (PR #89393)
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 19 07:49:58 PDT 2024
https://github.com/sdesmalen-arm created https://github.com/llvm/llvm-project/pull/89393
In preparation of decoupling codegen for SME from SVE, this patch does a bit of cleanup to unify the logic around calling
'addTypeForFixedLengthSVE'.
We only want to call this function when:
* We have access to both SVE and NEON, but we prefer to use SVE.
* We have access to SVE, but there is no access to NEON.
Inside 'addTypeForFixedLengthSVE', we normally use Custom lowering for all operations so they can be converted to/from scalable vector operations.
However, there are some exceptions:
* For 64/128bit vector loads/stores we prefer the AdvSIMD LDR/STR D/Q-reg instructions, since these are available in Streaming-SVE mode.
* For some operations like gather/scatter, we can only use SVE if the full set of SVE instructions is available (as opposed to the streaming[-compatible] subset). Otherwise, these operations need to expand (scalarise)
>From 9f35048c933b95bbfb598bc5cb9b953e98aa43c6 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 19 Apr 2024 11:49:07 +0100
Subject: [PATCH] [AArch64] Unify lowering logic for fixed-length vectors.
In preparation of decoupling codegen for SME from SVE, this patch does a
bit of cleanup to unify the logic around calling
'addTypeForFixedLengthSVE'.
We only want to call this function when:
* We have access to both SVE and NEON, but we prefer to use SVE.
* We have access to SVE, but there is no access to NEON.
Inside 'addTypeForFixedLengthSVE', we normally use Custom lowering for all
operations so they can be converted to/from scalable vector operations.
However, there are some exceptions:
* For 64/128bit vector loads/stores we prefer the AdvSIMD LDR/STR D/Q-reg
instructions, since these are available in Streaming-SVE mode.
* For some operations like gather/scatter, we can only use SVE if the full
set of SVE instructions is available (as opposed to the
streaming[-compatible] subset). Otherwise, these operations need to
expand (scalarise)
---
.../Target/AArch64/AArch64ISelLowering.cpp | 234 ++++++++----------
llvm/lib/Target/AArch64/AArch64ISelLowering.h | 2 +-
...sve-streaming-mode-fixed-length-bitcast.ll | 7 +-
...ng-mode-fixed-length-extract-vector-elt.ll | 2 -
...streaming-mode-fixed-length-fp-compares.ll | 8 +-
.../sve-streaming-mode-fixed-length-fp-fma.ll | 3 -
...e-streaming-mode-fixed-length-fp-minmax.ll | 8 -
...e-streaming-mode-fixed-length-fp-reduce.ll | 10 -
...streaming-mode-fixed-length-fp-rounding.ll | 7 -
...e-streaming-mode-fixed-length-fp-select.ll | 11 +-
...e-streaming-mode-fixed-length-fp-to-int.ll | 2 -
...-streaming-mode-fixed-length-fp-vselect.ll | 11 +-
...-streaming-mode-fixed-length-ld2-alloca.ll | 4 +-
.../sve-streaming-mode-fixed-length-loads.ll | 11 +-
...eaming-mode-fixed-length-optimize-ptrue.ll | 14 +-
...e-streaming-mode-fixed-length-subvector.ll | 9 +-
16 files changed, 127 insertions(+), 216 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index eee67a0f823c19..f3275d4d2b55fc 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1603,39 +1603,19 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
}
- if (!Subtarget->isNeonAvailable()) {
- setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Custom);
- setTruncStoreAction(MVT::v4f32, MVT::v4bf16, Custom);
- setTruncStoreAction(MVT::v8f32, MVT::v8bf16, Custom);
- setTruncStoreAction(MVT::v2f64, MVT::v2bf16, Custom);
- setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Custom);
- setTruncStoreAction(MVT::v2f32, MVT::v2f16, Custom);
- setTruncStoreAction(MVT::v4f32, MVT::v4f16, Custom);
- setTruncStoreAction(MVT::v8f32, MVT::v8f16, Custom);
- setTruncStoreAction(MVT::v1f64, MVT::v1f16, Custom);
- setTruncStoreAction(MVT::v2f64, MVT::v2f16, Custom);
- setTruncStoreAction(MVT::v4f64, MVT::v4f16, Custom);
- setTruncStoreAction(MVT::v1f64, MVT::v1f32, Custom);
- setTruncStoreAction(MVT::v2f64, MVT::v2f32, Custom);
- setTruncStoreAction(MVT::v4f64, MVT::v4f32, Custom);
- for (MVT VT : {MVT::v8i8, MVT::v16i8, MVT::v4i16, MVT::v8i16, MVT::v2i32,
- MVT::v4i32, MVT::v1i64, MVT::v2i64})
- addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ true);
-
- for (MVT VT :
- {MVT::v4f16, MVT::v8f16, MVT::v2f32, MVT::v4f32, MVT::v2f64})
- addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ true);
- }
-
// NOTE: Currently this has to happen after computeRegisterProperties rather
// than the preferred option of combining it with the addRegisterClass call.
if (Subtarget->useSVEForFixedLengthVectors()) {
- for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
- if (useSVEForFixedLengthVectorVT(VT))
- addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ false);
- for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
- if (useSVEForFixedLengthVectorVT(VT))
- addTypeForFixedLengthSVE(VT, /*StreamingSVE=*/ false);
+ for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
+ if (useSVEForFixedLengthVectorVT(
+ VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
+ addTypeForFixedLengthSVE(VT);
+ }
+ for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
+ if (useSVEForFixedLengthVectorVT(
+ VT, /*OverrideNEON=*/!Subtarget->isNeonAvailable()))
+ addTypeForFixedLengthSVE(VT);
+ }
// 64bit results can mean a bigger than NEON input.
for (auto VT : {MVT::v8i8, MVT::v4i16})
@@ -1869,8 +1849,7 @@ bool AArch64TargetLowering::shouldExpandCttzElements(EVT VT) const {
return !Subtarget->hasSVEorSME() || VT != MVT::nxv16i1;
}
-void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
- bool StreamingSVE) {
+void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT) {
assert(VT.isFixedLengthVector() && "Expected fixed length vector type!");
// By default everything must be expanded.
@@ -1889,13 +1868,17 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
setCondCodeAction(ISD::SETONE, VT, Expand);
}
+ TargetLoweringBase::LegalizeAction Default =
+ VT == MVT::v1f64 ? Expand : Custom;
+
// Mark integer truncating stores/extending loads as having custom lowering
if (VT.isInteger()) {
MVT InnerVT = VT.changeVectorElementType(MVT::i8);
while (InnerVT != VT) {
- setTruncStoreAction(VT, InnerVT, Custom);
- setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Custom);
- setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Custom);
+ setTruncStoreAction(VT, InnerVT, Default);
+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Default);
+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Default);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Default);
InnerVT = InnerVT.changeVectorElementType(
MVT::getIntegerVT(2 * InnerVT.getScalarSizeInBits()));
}
@@ -1907,101 +1890,104 @@ void AArch64TargetLowering::addTypeForFixedLengthSVE(MVT VT,
MVT InnerVT = VT.changeVectorElementType(MVT::f16);
while (InnerVT != VT) {
setTruncStoreAction(VT, InnerVT, Custom);
- setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Custom);
+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Default);
InnerVT = InnerVT.changeVectorElementType(
MVT::getFloatingPointVT(2 * InnerVT.getScalarSizeInBits()));
}
}
+ bool PreferNEON = VT.is64BitVector() || VT.is128BitVector();
+ bool PreferSVE = !PreferNEON && Subtarget->isSVEAvailable();
+
// Lower fixed length vector operations to scalable equivalents.
- setOperationAction(ISD::ABS, VT, Custom);
- setOperationAction(ISD::ADD, VT, Custom);
- setOperationAction(ISD::AND, VT, Custom);
- setOperationAction(ISD::ANY_EXTEND, VT, Custom);
- setOperationAction(ISD::BITCAST, VT, StreamingSVE ? Legal : Custom);
- setOperationAction(ISD::BITREVERSE, VT, Custom);
- setOperationAction(ISD::BSWAP, VT, Custom);
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
- setOperationAction(ISD::CTLZ, VT, Custom);
- setOperationAction(ISD::CTPOP, VT, Custom);
- setOperationAction(ISD::CTTZ, VT, Custom);
- setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
- setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::FABS, VT, Custom);
- setOperationAction(ISD::FADD, VT, Custom);
- setOperationAction(ISD::FCEIL, VT, Custom);
- setOperationAction(ISD::FCOPYSIGN, VT, Custom);
- setOperationAction(ISD::FDIV, VT, Custom);
- setOperationAction(ISD::FFLOOR, VT, Custom);
- setOperationAction(ISD::FMA, VT, Custom);
- setOperationAction(ISD::FMAXIMUM, VT, Custom);
- setOperationAction(ISD::FMAXNUM, VT, Custom);
- setOperationAction(ISD::FMINIMUM, VT, Custom);
- setOperationAction(ISD::FMINNUM, VT, Custom);
- setOperationAction(ISD::FMUL, VT, Custom);
- setOperationAction(ISD::FNEARBYINT, VT, Custom);
- setOperationAction(ISD::FNEG, VT, Custom);
- setOperationAction(ISD::FP_EXTEND, VT, Custom);
- setOperationAction(ISD::FP_ROUND, VT, Custom);
- setOperationAction(ISD::FP_TO_SINT, VT, Custom);
- setOperationAction(ISD::FP_TO_UINT, VT, Custom);
- setOperationAction(ISD::FRINT, VT, Custom);
- setOperationAction(ISD::FROUND, VT, Custom);
- setOperationAction(ISD::FROUNDEVEN, VT, Custom);
- setOperationAction(ISD::FSQRT, VT, Custom);
- setOperationAction(ISD::FSUB, VT, Custom);
- setOperationAction(ISD::FTRUNC, VT, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
- setOperationAction(ISD::LOAD, VT, StreamingSVE ? Legal : Custom);
- setOperationAction(ISD::MGATHER, VT, StreamingSVE ? Expand : Custom);
- setOperationAction(ISD::MLOAD, VT, Custom);
- setOperationAction(ISD::MSCATTER, VT, StreamingSVE ? Expand : Custom);
- setOperationAction(ISD::MSTORE, VT, Custom);
- setOperationAction(ISD::MUL, VT, Custom);
- setOperationAction(ISD::MULHS, VT, Custom);
- setOperationAction(ISD::MULHU, VT, Custom);
- setOperationAction(ISD::OR, VT, Custom);
- setOperationAction(ISD::SCALAR_TO_VECTOR, VT, StreamingSVE ? Legal : Expand);
- setOperationAction(ISD::SDIV, VT, Custom);
- setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
- setOperationAction(ISD::SHL, VT, Custom);
- setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
- setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Custom);
- setOperationAction(ISD::SINT_TO_FP, VT, Custom);
- setOperationAction(ISD::SMAX, VT, Custom);
- setOperationAction(ISD::SMIN, VT, Custom);
- setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
- setOperationAction(ISD::SRA, VT, Custom);
- setOperationAction(ISD::SRL, VT, Custom);
- setOperationAction(ISD::STORE, VT, StreamingSVE ? Legal : Custom);
- setOperationAction(ISD::SUB, VT, Custom);
- setOperationAction(ISD::TRUNCATE, VT, Custom);
- setOperationAction(ISD::UDIV, VT, Custom);
- setOperationAction(ISD::UINT_TO_FP, VT, Custom);
- setOperationAction(ISD::UMAX, VT, Custom);
- setOperationAction(ISD::UMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Custom);
- setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Custom);
- setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT,
- StreamingSVE ? Expand : Custom);
- setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
- setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
- setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
- setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
- setOperationAction(ISD::VECTOR_SPLICE, VT, Custom);
- setOperationAction(ISD::VSELECT, VT, Custom);
- setOperationAction(ISD::XOR, VT, Custom);
- setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
+ setOperationAction(ISD::ABS, VT, Default);
+ setOperationAction(ISD::ADD, VT, Default);
+ setOperationAction(ISD::AND, VT, Default);
+ setOperationAction(ISD::ANY_EXTEND, VT, Default);
+ setOperationAction(ISD::BITCAST, VT, PreferNEON ? Legal : Default);
+ setOperationAction(ISD::BITREVERSE, VT, Default);
+ setOperationAction(ISD::BSWAP, VT, Default);
+ setOperationAction(ISD::BUILD_VECTOR, VT, Default);
+ setOperationAction(ISD::CONCAT_VECTORS, VT, Default);
+ setOperationAction(ISD::CTLZ, VT, Default);
+ setOperationAction(ISD::CTPOP, VT, Default);
+ setOperationAction(ISD::CTTZ, VT, Default);
+ setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Default);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Default);
+ setOperationAction(ISD::FABS, VT, Default);
+ setOperationAction(ISD::FADD, VT, Default);
+ setOperationAction(ISD::FCEIL, VT, Default);
+ setOperationAction(ISD::FCOPYSIGN, VT, Default);
+ setOperationAction(ISD::FDIV, VT, Default);
+ setOperationAction(ISD::FFLOOR, VT, Default);
+ setOperationAction(ISD::FMA, VT, Default);
+ setOperationAction(ISD::FMAXIMUM, VT, Default);
+ setOperationAction(ISD::FMAXNUM, VT, Default);
+ setOperationAction(ISD::FMINIMUM, VT, Default);
+ setOperationAction(ISD::FMINNUM, VT, Default);
+ setOperationAction(ISD::FMUL, VT, Default);
+ setOperationAction(ISD::FNEARBYINT, VT, Default);
+ setOperationAction(ISD::FNEG, VT, Default);
+ setOperationAction(ISD::FP_EXTEND, VT, Default);
+ setOperationAction(ISD::FP_ROUND, VT, Default);
+ setOperationAction(ISD::FP_TO_SINT, VT, Default);
+ setOperationAction(ISD::FP_TO_UINT, VT, Default);
+ setOperationAction(ISD::FRINT, VT, Default);
+ setOperationAction(ISD::FROUND, VT, Default);
+ setOperationAction(ISD::FROUNDEVEN, VT, Default);
+ setOperationAction(ISD::FSQRT, VT, Default);
+ setOperationAction(ISD::FSUB, VT, Default);
+ setOperationAction(ISD::FTRUNC, VT, Default);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Default);
+ setOperationAction(ISD::LOAD, VT, PreferNEON ? Legal : Default);
+ setOperationAction(ISD::MGATHER, VT, PreferSVE ? Default : Expand);
+ setOperationAction(ISD::MLOAD, VT, Default);
+ setOperationAction(ISD::MSCATTER, VT, PreferSVE ? Default : Expand);
+ setOperationAction(ISD::MSTORE, VT, Default);
+ setOperationAction(ISD::MUL, VT, Default);
+ setOperationAction(ISD::MULHS, VT, Default);
+ setOperationAction(ISD::MULHU, VT, Default);
+ setOperationAction(ISD::OR, VT, Default);
+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT,
+ isTypeLegal(VT) ? Legal : Expand);
+ setOperationAction(ISD::SDIV, VT, Default);
+ setOperationAction(ISD::SELECT, VT, Default);
+ setOperationAction(ISD::SETCC, VT, Default);
+ setOperationAction(ISD::SHL, VT, Default);
+ setOperationAction(ISD::SIGN_EXTEND, VT, Default);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, VT, Default);
+ setOperationAction(ISD::SINT_TO_FP, VT, Default);
+ setOperationAction(ISD::SMAX, VT, Default);
+ setOperationAction(ISD::SMIN, VT, Default);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Default);
+ setOperationAction(ISD::SRA, VT, Default);
+ setOperationAction(ISD::SRL, VT, Default);
+ setOperationAction(ISD::STORE, VT, PreferNEON ? Legal : Default);
+ setOperationAction(ISD::SUB, VT, Default);
+ setOperationAction(ISD::TRUNCATE, VT, Default);
+ setOperationAction(ISD::UDIV, VT, Default);
+ setOperationAction(ISD::UINT_TO_FP, VT, Default);
+ setOperationAction(ISD::UMAX, VT, Default);
+ setOperationAction(ISD::UMIN, VT, Default);
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Default);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Default);
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Default);
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Default);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Default);
+ setOperationAction(ISD::VECREDUCE_FMAXIMUM, VT, Default);
+ setOperationAction(ISD::VECREDUCE_FMINIMUM, VT, Default);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Default);
+ setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, PreferSVE ? Default : Expand);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Default);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Default);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Default);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Default);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Default);
+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Default);
+ setOperationAction(ISD::VECTOR_SPLICE, VT, Default);
+ setOperationAction(ISD::VSELECT, VT, Default);
+ setOperationAction(ISD::XOR, VT, Default);
+ setOperationAction(ISD::ZERO_EXTEND, VT, Default);
}
void AArch64TargetLowering::addDRTypeForNEON(MVT VT) {
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index db6e8a00d2fb5e..400368a5e1303d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1013,7 +1013,7 @@ class AArch64TargetLowering : public TargetLowering {
bool isExtFreeImpl(const Instruction *Ext) const override;
void addTypeForNEON(MVT VT);
- void addTypeForFixedLengthSVE(MVT VT, bool StreamingSVE);
+ void addTypeForFixedLengthSVE(MVT VT);
void addDRTypeForNEON(MVT VT);
void addQRTypeForNEON(MVT VT);
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
index dd72c2b2bd0109..e3cc74f766ee0e 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll
@@ -60,11 +60,8 @@ define void @bitcast_v2i16(ptr %a, ptr %b) {
; CHECK: // %bb.0:
; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldrh w8, [x0, #2]
-; CHECK-NEXT: str w8, [sp, #4]
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: str w8, [sp]
-; CHECK-NEXT: ldr d0, [sp]
+; CHECK-NEXT: ptrue p0.s, vl2
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: mov z1.s, z0.s[1]
; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: strh w8, [sp, #8]
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
index 1b9bb42c8582e1..a752e119b2fb2a 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-vector-elt.ll
@@ -90,8 +90,6 @@ define float @extractelement_v8f32(ptr %a) {
define double @extractelement_v1f64(<1 x double> %op1) {
; CHECK-LABEL: extractelement_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%r = extractelement <1 x double> %op1, i64 0
ret double %r
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
index aad078f035f7d6..624d1823abd187 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll
@@ -127,11 +127,9 @@ define void @fcmp_oeq_v8f32(ptr %a, ptr %b, ptr %c) {
define <1 x i64> @fcmp_oeq_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK-LABEL: fcmp_oeq_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: ptrue p0.d, vl1
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z1.d
-; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: fcmp d0, d1
+; CHECK-NEXT: csetm x8, eq
+; CHECK-NEXT: mov z0.d, x8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%cmp = fcmp oeq <1 x double> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
index b5df97f767c13b..c51cedb493fa27 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll
@@ -112,9 +112,6 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) {
define <1 x double> @fma_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x double> %op3) {
; CHECK-LABEL: fma_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmadd d0, d0, d1, d2
; CHECK-NEXT: ret
%mul = fmul contract <1 x double> %op1, %op2
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
index 07a67e26502909..4d583c4e06ccaf 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll
@@ -99,8 +99,6 @@ define void @fmaxnm_v8f32(ptr %a, ptr %b) {
define <1 x double> @fmaxnm_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK-LABEL: fmaxnm_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmaxnm d0, d0, d1
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.maxnum.v1f64(<1 x double> %op1, <1 x double> %op2)
@@ -233,8 +231,6 @@ define void @fminnm_v8f32(ptr %a, ptr %b) {
define <1 x double> @fminnm_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK-LABEL: fminnm_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fminnm d0, d0, d1
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.minnum.v1f64(<1 x double> %op1, <1 x double> %op2)
@@ -367,8 +363,6 @@ define void @fmax_v8f32(ptr %a, ptr %b) {
define <1 x double> @fmax_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK-LABEL: fmax_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmax d0, d0, d1
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.maximum.v1f64(<1 x double> %op1, <1 x double> %op2)
@@ -501,8 +495,6 @@ define void @fmin_v8f32(ptr %a, ptr %b) {
define <1 x double> @fmin_v1f64(<1 x double> %op1, <1 x double> %op2) {
; CHECK-LABEL: fmin_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fmin d0, d0, d1
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.minimum.v1f64(<1 x double> %op1, <1 x double> %op2)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
index d2d771c48c2044..fbf0f433d0c033 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll
@@ -144,7 +144,6 @@ define float @fadda_v8f32(float %start, ptr %a) {
define double @fadda_v1f64(double %start, <1 x double> %a) {
; CHECK-LABEL: fadda_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
%res = call double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
@@ -263,7 +262,6 @@ define float @faddv_v8f32(float %start, ptr %a) {
define double @faddv_v1f64(double %start, <1 x double> %a) {
; CHECK-LABEL: faddv_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
; CHECK-NEXT: fadd d0, d0, d1
; CHECK-NEXT: ret
%res = call fast double @llvm.vector.reduce.fadd.v1f64(double %start, <1 x double> %a)
@@ -379,8 +377,6 @@ define float @fmaxv_v8f32(ptr %a) {
define double @fmaxv_v1f64(<1 x double> %a) {
; CHECK-LABEL: fmaxv_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call double @llvm.vector.reduce.fmax.v1f64(<1 x double> %a)
ret double %res
@@ -495,8 +491,6 @@ define float @fminv_v8f32(ptr %a) {
define double @fminv_v1f64(<1 x double> %a) {
; CHECK-LABEL: fminv_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call double @llvm.vector.reduce.fmin.v1f64(<1 x double> %a)
ret double %res
@@ -611,8 +605,6 @@ define float @fmaximumv_v8f32(ptr %a) {
define double @fmaximumv_v1f64(<1 x double> %a) {
; CHECK-LABEL: fmaximumv_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call double @llvm.vector.reduce.fmaximum.v1f64(<1 x double> %a)
ret double %res
@@ -727,8 +719,6 @@ define float @fminimumv_v8f32(ptr %a) {
define double @fminimumv_v1f64(<1 x double> %a) {
; CHECK-LABEL: fminimumv_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%res = call double @llvm.vector.reduce.fminimum.v1f64(<1 x double> %a)
ret double %res
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
index 580b43531070fc..498c1c1b7f944f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll
@@ -101,7 +101,6 @@ define void @frintp_v8f32(ptr %a) {
define <1 x double> @frintp_v1f64(<1 x double> %op) {
; CHECK-LABEL: frintp_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: frintp d0, d0
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.ceil.v1f64(<1 x double> %op)
@@ -232,7 +231,6 @@ define void @frintm_v8f32(ptr %a) {
define <1 x double> @frintm_v1f64(<1 x double> %op) {
; CHECK-LABEL: frintm_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: frintm d0, d0
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.floor.v1f64(<1 x double> %op)
@@ -363,7 +361,6 @@ define void @frinti_v8f32(ptr %a) {
define <1 x double> @frinti_v1f64(<1 x double> %op) {
; CHECK-LABEL: frinti_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: frinti d0, d0
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.nearbyint.v1f64(<1 x double> %op)
@@ -494,7 +491,6 @@ define void @frintx_v8f32(ptr %a) {
define <1 x double> @frintx_v1f64(<1 x double> %op) {
; CHECK-LABEL: frintx_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: frintx d0, d0
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.rint.v1f64(<1 x double> %op)
@@ -625,7 +621,6 @@ define void @frinta_v8f32(ptr %a) {
define <1 x double> @frinta_v1f64(<1 x double> %op) {
; CHECK-LABEL: frinta_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: frinta d0, d0
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.round.v1f64(<1 x double> %op)
@@ -756,7 +751,6 @@ define void @frintn_v8f32(ptr %a) {
define <1 x double> @frintn_v1f64(<1 x double> %op) {
; CHECK-LABEL: frintn_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: frintn d0, d0
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.roundeven.v1f64(<1 x double> %op)
@@ -887,7 +881,6 @@ define void @frintz_v8f32(ptr %a) {
define <1 x double> @frintz_v1f64(<1 x double> %op) {
; CHECK-LABEL: frintz_v1f64:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: frintz d0, d0
; CHECK-NEXT: ret
%res = call <1 x double> @llvm.trunc.v1f64(<1 x double> %op)
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
index 73fd7e14653433..c41857ba9aa5bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll
@@ -132,16 +132,7 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, i1 %mask
; CHECK-LABEL: select_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: csetm x8, ne
-; CHECK-NEXT: mvn x9, x8
-; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: mov z3.d, x9
-; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fcsel d0, d0, d1, ne
; CHECK-NEXT: ret
%sel = select i1 %mask, <1 x double> %op1, <1 x double> %op2
ret <1 x double> %sel
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
index d6adf9cf0ad672..6fb32c282f5b88 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll
@@ -464,7 +464,6 @@ define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) {
define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) {
; CHECK-LABEL: fcvtzu_v1f64_v1i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzs w8, d0
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
@@ -1216,7 +1215,6 @@ define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) {
define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) {
; CHECK-LABEL: fcvtzs_v1f64_v1i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
; CHECK-NEXT: fcvtzs w8, d0
; CHECK-NEXT: mov z0.h, w8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
index ee8704284def5f..a419d7b3896503 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll
@@ -149,16 +149,7 @@ define <1 x double> @select_v1f64(<1 x double> %op1, <1 x double> %op2, <1 x i1>
; CHECK-LABEL: select_v1f64:
; CHECK: // %bb.0:
; CHECK-NEXT: tst w0, #0x1
-; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0
-; CHECK-NEXT: csetm x8, ne
-; CHECK-NEXT: mvn x9, x8
-; CHECK-NEXT: mov z2.d, x8
-; CHECK-NEXT: mov z3.d, x9
-; CHECK-NEXT: and z0.d, z0.d, z2.d
-; CHECK-NEXT: and z1.d, z1.d, z3.d
-; CHECK-NEXT: orr z0.d, z0.d, z1.d
-; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
+; CHECK-NEXT: fcsel d0, d0, d1, ne
; CHECK-NEXT: ret
%sel = select <1 x i1> %mask, <1 x double> %op1, <1 x double> %op2
ret <1 x double> %sel
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
index 1fc51d50b50ae0..efe9066f2c835f 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll
@@ -47,6 +47,7 @@ define void @alloc_v6i8(ptr %st_ptr) nounwind {
; CHECK-NEXT: add x20, sp, #24
; CHECK-NEXT: bl def
; CHECK-NEXT: ptrue p0.b, vl3
+; CHECK-NEXT: ptrue p1.s, vl2
; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: mov z2.b, z1.b[3]
@@ -63,9 +64,10 @@ define void @alloc_v6i8(ptr %st_ptr) nounwind {
; CHECK-NEXT: add x8, sp, #12
; CHECK-NEXT: ldr d0, [sp]
; CHECK-NEXT: st1b { z0.h }, p0, [x8]
-; CHECK-NEXT: ldrh w8, [sp, #12]
+; CHECK-NEXT: ld1h { z0.s }, p1/z, [x8]
; CHECK-NEXT: strb w9, [x19, #2]
; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: fmov w8, s0
; CHECK-NEXT: strh w8, [x19]
; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload
; CHECK-NEXT: add sp, sp, #48
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
index 688c39b89c0df0..8ca8e698091359 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-loads.ll
@@ -45,14 +45,9 @@ define <32 x i8> @load_v32i8(ptr %a) {
define <2 x i16> @load_v2i16(ptr %a) {
; CHECK-LABEL: load_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldrh w8, [x0, #2]
-; CHECK-NEXT: str w8, [sp, #12]
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: str w8, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
-; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ptrue p0.s, vl2
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0
; CHECK-NEXT: ret
%load = load <2 x i16>, ptr %a
ret <2 x i16> %load
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
index f0b0b3269e98ff..2a942d32688dce 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll
@@ -70,21 +70,11 @@ define void @add_v32i8(ptr %a, ptr %b) {
define void @add_v2i16(ptr %a, ptr %b, ptr %c) {
; CHECK-LABEL: add_v2i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldrh w8, [x0, #2]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: str w8, [sp, #4]
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: str w8, [sp]
-; CHECK-NEXT: ldrh w8, [x1, #2]
-; CHECK-NEXT: str w8, [sp, #12]
-; CHECK-NEXT: ldrh w8, [x1]
-; CHECK-NEXT: str w8, [sp, #8]
-; CHECK-NEXT: ldp d0, d1, [sp]
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
+; CHECK-NEXT: ld1h { z1.s }, p0/z, [x1]
; CHECK-NEXT: add z0.s, z0.s, z1.s
; CHECK-NEXT: st1h { z0.s }, p0, [x0]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%op1 = load <2 x i16>, ptr %a
%op2 = load <2 x i16>, ptr %b
diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
index 75bae88fc4798e..838db0ce8185cf 100644
--- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-subvector.ll
@@ -77,16 +77,9 @@ bb1:
define void @subvector_v2i16(ptr %in, ptr %out) {
; CHECK-LABEL: subvector_v2i16:
; CHECK: // %bb.0: // %bb1
-; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: ldrh w8, [x0, #2]
; CHECK-NEXT: ptrue p0.s, vl2
-; CHECK-NEXT: str w8, [sp, #12]
-; CHECK-NEXT: ldrh w8, [x0]
-; CHECK-NEXT: str w8, [sp, #8]
-; CHECK-NEXT: ldr d0, [sp, #8]
+; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0]
; CHECK-NEXT: st1h { z0.s }, p0, [x1]
-; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ret
%a = load <2 x i16>, ptr %in
br label %bb1
More information about the llvm-commits
mailing list