[llvm-branch-commits] [llvm] 9f4d9a0 - [AArch64][DAGCombine] Add performBuildVectorCombine 'extract_elt ~> anyext'
Tobias Hieta via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Aug 9 04:32:43 PDT 2022
Author: David Sherwood
Date: 2022-08-09T13:30:00+02:00
New Revision: 9f4d9a0b7b75210b284816e88362f48e7f64aff8
URL: https://github.com/llvm/llvm-project/commit/9f4d9a0b7b75210b284816e88362f48e7f64aff8
DIFF: https://github.com/llvm/llvm-project/commit/9f4d9a0b7b75210b284816e88362f48e7f64aff8.diff
LOG: [AArch64][DAGCombine] Add performBuildVectorCombine 'extract_elt ~> anyext'
A build vector of two extracted elements is equivalent to an extract
subvector where the inner vector is any-extended to the
extract_vector_elt VT, because extract_vector_elt has the effect of an
any-extend.
(build_vector (extract_elt_i16_to_i32 vec Idx+0) (extract_elt_i16_to_i32 vec Idx+1))
=> (extract_subvector (anyext_i16_to_i32 vec) Idx)
Depends on D130697
Differential Revision: https://reviews.llvm.org/D130698
(cherry picked from commit 487fa6f8c3af87232f7ff9484568be7782f7f8b2)
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
llvm/test/CodeGen/AArch64/vector-fcvt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 72f0fc94940ca..c28216048d7cb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -886,7 +886,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setTargetDAGCombine({ISD::ANY_EXTEND, ISD::ZERO_EXTEND, ISD::SIGN_EXTEND,
ISD::VECTOR_SPLICE, ISD::SIGN_EXTEND_INREG,
ISD::CONCAT_VECTORS, ISD::EXTRACT_SUBVECTOR,
- ISD::INSERT_SUBVECTOR, ISD::STORE});
+ ISD::INSERT_SUBVECTOR, ISD::STORE, ISD::BUILD_VECTOR});
if (Subtarget->supportsAddressTopByteIgnored())
setTargetDAGCombine(ISD::LOAD);
@@ -15988,6 +15988,49 @@ static SDValue performVectorAddSubExtCombine(SDNode *N, SelectionDAG &DAG) {
return SDValue();
}
+static SDValue performBuildVectorCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ SelectionDAG &DAG) {
+ SDLoc DL(N);
+
+ // A build vector of two extracted elements is equivalent to an
+ // extract subvector where the inner vector is any-extended to the
+ // extract_vector_elt VT.
+ // (build_vector (extract_elt_iXX_to_i32 vec Idx+0)
+ // (extract_elt_iXX_to_i32 vec Idx+1))
+ // => (extract_subvector (anyext_iXX_to_i32 vec) Idx)
+
+ // For now, only consider the v2i32 case, which arises as a result of
+ // legalization.
+ if (N->getValueType(0) != MVT::v2i32)
+ return SDValue();
+
+ SDValue Elt0 = N->getOperand(0), Elt1 = N->getOperand(1);
+ // Reminder, EXTRACT_VECTOR_ELT has the effect of any-extending to its VT.
+ if (Elt0->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Elt1->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ // Constant index.
+ isa<ConstantSDNode>(Elt0->getOperand(1)) &&
+ isa<ConstantSDNode>(Elt1->getOperand(1)) &&
+ // Both EXTRACT_VECTOR_ELT from same vector...
+ Elt0->getOperand(0) == Elt1->getOperand(0) &&
+ // ... and contiguous. First element's index +1 == second element's index.
+ Elt0->getConstantOperandVal(1) + 1 == Elt1->getConstantOperandVal(1)) {
+ SDValue VecToExtend = Elt0->getOperand(0);
+ EVT ExtVT = VecToExtend.getValueType().changeVectorElementType(MVT::i32);
+ if (!DAG.getTargetLoweringInfo().isTypeLegal(ExtVT))
+ return SDValue();
+
+ SDValue SubvectorIdx = DAG.getVectorIdxConstant(Elt0->getConstantOperandVal(1), DL);
+
+ SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, DL, ExtVT, VecToExtend);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2i32, Ext,
+ SubvectorIdx);
+ }
+
+ return SDValue();
+}
+
static SDValue performAddSubCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI,
SelectionDAG &DAG) {
@@ -19457,6 +19500,8 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ADD:
case ISD::SUB:
return performAddSubCombine(N, DCI, DAG);
+ case ISD::BUILD_VECTOR:
+ return performBuildVectorCombine(N, DCI, DAG);
case AArch64ISD::ANDS:
return performFlagSettingCombine(N, DCI, ISD::AND);
case AArch64ISD::ADC:
diff --git a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
index 78b71f41d7a62..864ddc2967c18 100644
--- a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
+++ b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll
@@ -6,11 +6,7 @@
define <2 x i16> @bitcast_v2i16_v2f16(<2 x half> %x) {
; CHECK-LABEL: bitcast_v2i16_v2f16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.h[0]
-; CHECK-NEXT: umov w9, v0.h[1]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%y = bitcast <2 x half> %x to <2 x i16>
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
index 540627b013641..f1057fc78e6d6 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll
@@ -101,11 +101,8 @@ define void @extract_subvector_v256i8(<256 x i8>* %a, <128 x i8>* %b) vscale_ran
define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) vscale_range(2,0) #0 {
; CHECK-LABEL: extract_subvector_v4i16:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.h[2]
-; CHECK-NEXT: umov w9, v0.h[3]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: mov v0.s[1], w9
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-NEXT: ret
%ret = call <2 x i16> @llvm.vector.extract.v2i16.v4i16(<4 x i16> %op, i64 2)
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
index bf53482bef763..11323ac24131f 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll
@@ -575,10 +575,7 @@ define void @masked_gather_v2f16(<2 x half>* %a, <2 x half*>* %b) vscale_range(2
; CHECK-NEXT: movi d0, #0000000000000000
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0
-; CHECK-NEXT: umov w8, v1.h[0]
-; CHECK-NEXT: umov w9, v1.h[1]
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v1.s[1], w9
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: shl v1.2s, v1.2s, #16
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
; CHECK-NEXT: fmov w8, s1
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
index 28e442e4cfe2b..eef6d6034f89d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll
@@ -17,10 +17,7 @@ define <2 x half> @masked_load_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_ra
; CHECK-NEXT: ldr s2, [x1]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h
-; CHECK-NEXT: umov w8, v1.h[0]
-; CHECK-NEXT: umov w9, v1.h[1]
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: mov v1.s[1], w9
+; CHECK-NEXT: ushll v1.4s, v1.4h, #0
; CHECK-NEXT: shl v1.2s, v1.2s, #16
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
; CHECK-NEXT: fmov w8, s1
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
index 58834bf39eb84..54b16032c3cd2 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll
@@ -538,10 +538,7 @@ define void @masked_scatter_v2f16(<2 x half>* %a, <2 x half*>* %b) vscale_range(
; CHECK-NEXT: ptrue p0.d, vl4
; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0
; CHECK-NEXT: uunpklo z1.s, z1.h
-; CHECK-NEXT: umov w8, v2.h[0]
-; CHECK-NEXT: umov w9, v2.h[1]
-; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: mov v2.s[1], w9
+; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: shl v2.2s, v2.2s, #16
; CHECK-NEXT: sshr v2.2s, v2.2s, #16
; CHECK-NEXT: fmov w8, s2
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
index 3d6099e9a7920..e57523efdace3 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll
@@ -17,10 +17,7 @@ define void @masked_store_v2f16(<2 x half>* %ap, <2 x half>* %bp) vscale_range(2
; CHECK-NEXT: ldr s2, [x1]
; CHECK-NEXT: ptrue p0.h, vl4
; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h
-; CHECK-NEXT: umov w8, v2.h[0]
-; CHECK-NEXT: umov w9, v2.h[1]
-; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: mov v2.s[1], w9
+; CHECK-NEXT: ushll v2.4s, v2.4h, #0
; CHECK-NEXT: shl v2.2s, v2.2s, #16
; CHECK-NEXT: sshr v2.2s, v2.2s, #16
; CHECK-NEXT: fmov w8, s2
diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
index 4b7736eb6026d..7a47a3e832b9a 100644
--- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll
@@ -194,23 +194,16 @@ define <8 x float> @uitofp_i64_float(<8 x i64> %a) {
define <4 x double> @sitofp_v4i8_double(<4 x i8> %a) {
; CHECK-LABEL: sitofp_v4i8_double:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.h[0]
-; CHECK-NEXT: umov w9, v0.h[2]
-; CHECK-NEXT: umov w10, v0.h[1]
-; CHECK-NEXT: fmov s1, w8
-; CHECK-NEXT: umov w8, v0.h[3]
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: mov v1.s[1], w10
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: shl v1.2s, v1.2s, #24
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: shl v0.2s, v0.2s, #24
-; CHECK-NEXT: sshr v1.2s, v1.2s, #24
; CHECK-NEXT: sshr v0.2s, v0.2s, #24
+; CHECK-NEXT: shl v1.2s, v1.2s, #24
+; CHECK-NEXT: sshll v0.2d, v0.2s, #0
+; CHECK-NEXT: sshr v1.2s, v1.2s, #24
+; CHECK-NEXT: scvtf v0.2d, v0.2d
; CHECK-NEXT: sshll v1.2d, v1.2s, #0
-; CHECK-NEXT: sshll v2.2d, v0.2s, #0
-; CHECK-NEXT: scvtf v0.2d, v1.2d
-; CHECK-NEXT: scvtf v1.2d, v2.2d
+; CHECK-NEXT: scvtf v1.2d, v1.2d
; CHECK-NEXT: ret
%1 = sitofp <4 x i8> %a to <4 x double>
ret <4 x double> %1
@@ -333,39 +326,26 @@ define <16 x double> @sitofp_v16i8_double(<16 x i8> %a) {
define <8 x double> @sitofp_i16_double(<8 x i16> %a) {
; CHECK-LABEL: sitofp_i16_double:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: umov w8, v0.h[0]
-; CHECK-NEXT: umov w9, v0.h[2]
-; CHECK-NEXT: umov w11, v0.h[1]
-; CHECK-NEXT: umov w10, v1.h[0]
-; CHECK-NEXT: umov w12, v1.h[2]
-; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: umov w8, v0.h[3]
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: umov w9, v1.h[1]
-; CHECK-NEXT: fmov s3, w10
-; CHECK-NEXT: umov w10, v1.h[3]
-; CHECK-NEXT: fmov s1, w12
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: mov v2.s[1], w11
-; CHECK-NEXT: mov v3.s[1], w9
-; CHECK-NEXT: mov v1.s[1], w10
-; CHECK-NEXT: shl v0.2s, v0.2s, #16
-; CHECK-NEXT: shl v2.2s, v2.2s, #16
-; CHECK-NEXT: sshr v0.2s, v0.2s, #16
-; CHECK-NEXT: shl v3.2s, v3.2s, #16
-; CHECK-NEXT: shl v1.2s, v1.2s, #16
+; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: shl v2.2s, v1.2s, #16
+; CHECK-NEXT: shl v3.2s, v0.2s, #16
+; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8
; CHECK-NEXT: sshr v2.2s, v2.2s, #16
-; CHECK-NEXT: sshll v4.2d, v0.2s, #0
-; CHECK-NEXT: sshr v0.2s, v3.2s, #16
-; CHECK-NEXT: sshr v1.2s, v1.2s, #16
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-NEXT: sshr v3.2s, v3.2s, #16
; CHECK-NEXT: sshll v2.2d, v2.2s, #0
-; CHECK-NEXT: sshll v3.2d, v0.2s, #0
-; CHECK-NEXT: sshll v5.2d, v1.2s, #0
-; CHECK-NEXT: scvtf v0.2d, v2.2d
-; CHECK-NEXT: scvtf v1.2d, v4.2d
-; CHECK-NEXT: scvtf v2.2d, v3.2d
-; CHECK-NEXT: scvtf v3.2d, v5.2d
+; CHECK-NEXT: shl v1.2s, v1.2s, #16
+; CHECK-NEXT: shl v0.2s, v0.2s, #16
+; CHECK-NEXT: scvtf v2.2d, v2.2d
+; CHECK-NEXT: sshr v1.2s, v1.2s, #16
+; CHECK-NEXT: sshr v0.2s, v0.2s, #16
+; CHECK-NEXT: sshll v3.2d, v3.2s, #0
+; CHECK-NEXT: sshll v4.2d, v1.2s, #0
+; CHECK-NEXT: sshll v1.2d, v0.2s, #0
+; CHECK-NEXT: scvtf v0.2d, v3.2d
+; CHECK-NEXT: scvtf v1.2d, v1.2d
+; CHECK-NEXT: scvtf v3.2d, v4.2d
; CHECK-NEXT: ret
%1 = sitofp <8 x i16> %a to <8 x double>
ret <8 x double> %1
@@ -402,22 +382,15 @@ define <8 x double> @sitofp_i64_double(<8 x i64> %a) {
define <4 x double> @uitofp_v4i8_double(<4 x i8> %a) {
; CHECK-LABEL: uitofp_v4i8_double:
; CHECK: // %bb.0:
-; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
-; CHECK-NEXT: umov w8, v0.h[0]
-; CHECK-NEXT: umov w9, v0.h[2]
-; CHECK-NEXT: umov w10, v0.h[1]
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
; CHECK-NEXT: movi d1, #0x0000ff000000ff
-; CHECK-NEXT: fmov s2, w8
-; CHECK-NEXT: umov w8, v0.h[3]
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: mov v2.s[1], w10
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: and v2.8b, v2.8b, v1.8b
+; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: ushll v1.2d, v2.2s, #0
-; CHECK-NEXT: ushll v2.2d, v0.2s, #0
-; CHECK-NEXT: ucvtf v0.2d, v1.2d
-; CHECK-NEXT: ucvtf v1.2d, v2.2d
+; CHECK-NEXT: ushll v0.2d, v0.2s, #0
+; CHECK-NEXT: and v1.8b, v2.8b, v1.8b
+; CHECK-NEXT: ushll v1.2d, v1.2s, #0
+; CHECK-NEXT: ucvtf v0.2d, v0.2d
+; CHECK-NEXT: ucvtf v1.2d, v1.2d
; CHECK-NEXT: ret
%1 = uitofp <4 x i8> %a to <4 x double>
ret <4 x double> %1
@@ -530,36 +503,23 @@ define <16 x double> @uitofp_v16i8_double(<16 x i8> %a) {
define <8 x double> @uitofp_i16_double(<8 x i16> %a) {
; CHECK-LABEL: uitofp_i16_double:
; CHECK: // %bb.0:
-; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8
-; CHECK-NEXT: umov w8, v0.h[0]
-; CHECK-NEXT: umov w9, v0.h[2]
-; CHECK-NEXT: umov w11, v0.h[1]
; CHECK-NEXT: movi d1, #0x00ffff0000ffff
-; CHECK-NEXT: umov w10, v2.h[0]
-; CHECK-NEXT: umov w12, v2.h[2]
-; CHECK-NEXT: fmov s3, w8
-; CHECK-NEXT: umov w8, v0.h[3]
-; CHECK-NEXT: fmov s0, w9
-; CHECK-NEXT: umov w9, v2.h[1]
-; CHECK-NEXT: fmov s4, w10
-; CHECK-NEXT: umov w10, v2.h[3]
-; CHECK-NEXT: fmov s2, w12
-; CHECK-NEXT: mov v0.s[1], w8
-; CHECK-NEXT: mov v3.s[1], w11
-; CHECK-NEXT: mov v4.s[1], w9
-; CHECK-NEXT: mov v2.s[1], w10
-; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
-; CHECK-NEXT: and v3.8b, v3.8b, v1.8b
-; CHECK-NEXT: ushll v5.2d, v0.2s, #0
-; CHECK-NEXT: and v0.8b, v4.8b, v1.8b
-; CHECK-NEXT: and v1.8b, v2.8b, v1.8b
+; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0
+; CHECK-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-NEXT: and v3.8b, v2.8b, v1.8b
+; CHECK-NEXT: and v4.8b, v0.8b, v1.8b
+; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8
+; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8
; CHECK-NEXT: ushll v3.2d, v3.2s, #0
-; CHECK-NEXT: ushll v2.2d, v0.2s, #0
-; CHECK-NEXT: ushll v4.2d, v1.2s, #0
-; CHECK-NEXT: ucvtf v0.2d, v3.2d
-; CHECK-NEXT: ucvtf v1.2d, v5.2d
-; CHECK-NEXT: ucvtf v2.2d, v2.2d
-; CHECK-NEXT: ucvtf v3.2d, v4.2d
+; CHECK-NEXT: ushll v4.2d, v4.2s, #0
+; CHECK-NEXT: and v2.8b, v2.8b, v1.8b
+; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
+; CHECK-NEXT: ushll v5.2d, v2.2s, #0
+; CHECK-NEXT: ucvtf v2.2d, v3.2d
+; CHECK-NEXT: ushll v1.2d, v0.2s, #0
+; CHECK-NEXT: ucvtf v0.2d, v4.2d
+; CHECK-NEXT: ucvtf v1.2d, v1.2d
+; CHECK-NEXT: ucvtf v3.2d, v5.2d
; CHECK-NEXT: ret
%1 = uitofp <8 x i16> %a to <8 x double>
ret <8 x double> %1
More information about the llvm-branch-commits
mailing list