[llvm] b062fff - Recommit "[AArch64] Custom lower <4 x i8> loads"
Sjoerd Meijer via llvm-commits
llvm-commits at lists.llvm.org
Wed Jun 30 01:18:40 PDT 2021
Author: Sjoerd Meijer
Date: 2021-06-30T09:18:06+01:00
New Revision: b062fff87adcfa2e252cbce43d92b61b76614bd5
URL: https://github.com/llvm/llvm-project/commit/b062fff87adcfa2e252cbce43d92b61b76614bd5
DIFF: https://github.com/llvm/llvm-project/commit/b062fff87adcfa2e252cbce43d92b61b76614bd5.diff
LOG: Recommit "[AArch64] Custom lower <4 x i8> loads"
This recommits D104782 including a fix for adding a wrong operand to the new
load node.
Differential Revision: https://reviews.llvm.org/D105110
Added:
Modified:
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
llvm/test/CodeGen/AArch64/arm64-vshift.ll
llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
llvm/test/CodeGen/AArch64/usub_sat_vec.ll
Removed:
llvm/test/CodeGen/AArch64/neon-extload.ll
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index b3edefe550f8..5b0e7d743b08 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1131,6 +1131,13 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VSCALE, MVT::i32, Custom);
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Custom);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::EXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i32, MVT::v4i8, Custom);
}
if (Subtarget->hasSVE()) {
@@ -4477,6 +4484,40 @@ SDValue AArch64TargetLowering::LowerSTORE(SDValue Op,
return SDValue();
}
+// Custom lowering for extending v4i8 vector loads.
+SDValue AArch64TargetLowering::LowerLOAD(SDValue Op,
+ SelectionDAG &DAG) const {
+ SDLoc DL(Op);
+ LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
+ assert(LoadNode && "Expected custom lowering of a load node");
+ EVT VT = Op->getValueType(0);
+ assert((VT == MVT::v4i16 || VT == MVT::v4i32) && "Expected v4i16 or v4i32");
+
+ if (LoadNode->getMemoryVT() != MVT::v4i8)
+ return SDValue();
+
+ unsigned ExtType;
+ if (LoadNode->getExtensionType() == ISD::SEXTLOAD)
+ ExtType = ISD::SIGN_EXTEND;
+ else if (LoadNode->getExtensionType() == ISD::ZEXTLOAD ||
+ LoadNode->getExtensionType() == ISD::EXTLOAD)
+ ExtType = ISD::ZERO_EXTEND;
+ else
+ return SDValue();
+
+ SDValue Load = DAG.getLoad(MVT::f32, DL, LoadNode->getChain(),
+ LoadNode->getBasePtr(), MachinePointerInfo());
+ SDValue Chain = Load.getValue(1);
+ SDValue Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f32, Load);
+ SDValue BC = DAG.getNode(ISD::BITCAST, DL, MVT::v8i8, Vec);
+ SDValue Ext = DAG.getNode(ExtType, DL, MVT::v8i16, BC);
+ Ext = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i16, Ext,
+ DAG.getConstant(0, DL, MVT::i64));
+ if (VT == MVT::v4i32)
+ Ext = DAG.getNode(ExtType, DL, MVT::v4i32, Ext);
+ return DAG.getMergeValues({Ext, Chain}, DL);
+}
+
// Generate SUBS and CSEL for integer abs.
SDValue AArch64TargetLowering::LowerABS(SDValue Op, SelectionDAG &DAG) const {
MVT VT = Op.getSimpleValueType();
@@ -4720,7 +4761,7 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
case ISD::LOAD:
if (useSVEForFixedLengthVectorVT(Op.getValueType()))
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
- llvm_unreachable("Unexpected request to lower ISD::LOAD");
+ return LowerLOAD(Op, DAG);
case ISD::ADD:
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
case ISD::AND:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index f3b2da830430..7daa61996739 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -851,6 +851,7 @@ class AArch64TargetLowering : public TargetLowering {
SmallVectorImpl<SDValue> &InVals, bool isThisReturn,
SDValue ThisVal) const;
+ SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerABS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
index 308352e3e227..1bbab3879dc3 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
+++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll
@@ -86,27 +86,222 @@ define <2 x i8> @test3(<2 x i8>* %v2i8_ptr) {
define <4 x i8> @test4(<4 x i8>* %v4i8_ptr) {
; CHECK-LE-LABEL: test4:
; CHECK-LE: // %bb.0:
-; CHECK-LE-NEXT: ld1 { v0.b }[0], [x0]
-; CHECK-LE-NEXT: add x8, x0, #1 // =1
-; CHECK-LE-NEXT: ld1 { v0.b }[2], [x8]
-; CHECK-LE-NEXT: add x8, x0, #2 // =2
-; CHECK-LE-NEXT: ld1 { v0.b }[4], [x8]
-; CHECK-LE-NEXT: add x8, x0, #3 // =3
-; CHECK-LE-NEXT: ld1 { v0.b }[6], [x8]
+; CHECK-LE-NEXT: ldr s0, [x0]
+; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
; CHECK-LE-NEXT: ret
;
; CHECK-BE-LABEL: test4:
; CHECK-BE: // %bb.0:
-; CHECK-BE-NEXT: ld1 { v0.b }[0], [x0]
-; CHECK-BE-NEXT: add x8, x0, #1 // =1
-; CHECK-BE-NEXT: ld1 { v0.b }[2], [x8]
-; CHECK-BE-NEXT: add x8, x0, #2 // =2
-; CHECK-BE-NEXT: ld1 { v0.b }[4], [x8]
-; CHECK-BE-NEXT: add x8, x0, #3 // =3
-; CHECK-BE-NEXT: ld1 { v0.b }[6], [x8]
+; CHECK-BE-NEXT: ldr s0, [x0]
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
; CHECK-BE-NEXT: ret
%v4i8 = load <4 x i8>, <4 x i8>* %v4i8_ptr
ret <4 x i8> %v4i8
}
+
+define <4 x i32> @fsext_v4i32(<4 x i8>* %a) {
+; CHECK-LE-LABEL: fsext_v4i32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ldr s0, [x0]
+; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: fsext_v4i32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ldr s0, [x0]
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0
+; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: ret
+ %x = load <4 x i8>, <4 x i8>* %a
+ %y = sext <4 x i8> %x to <4 x i32>
+ ret <4 x i32> %y
+}
+
+define <4 x i32> @fzext_v4i32(<4 x i8>* %a) {
+; CHECK-LE-LABEL: fzext_v4i32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ldr s0, [x0]
+; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: fzext_v4i32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ldr s0, [x0]
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: ret
+ %x = load <4 x i8>, <4 x i8>* %a
+ %y = zext <4 x i8> %x to <4 x i32>
+ ret <4 x i32> %y
+}
+
+; TODO: This codegen could just be:
+; ldrb w0, [x0]
+;
+define i32 @loadExti32(<4 x i8>* %ref) {
+; CHECK-LE-LABEL: loadExti32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ldr s0, [x0]
+; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT: umov w8, v0.h[0]
+; CHECK-LE-NEXT: and w0, w8, #0xff
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: loadExti32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ldr s0, [x0]
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: umov w8, v0.h[0]
+; CHECK-BE-NEXT: and w0, w8, #0xff
+; CHECK-BE-NEXT: ret
+ %a = load <4 x i8>, <4 x i8>* %ref
+ %vecext = extractelement <4 x i8> %a, i32 0
+ %conv = zext i8 %vecext to i32
+ ret i32 %conv
+}
+
+define <4 x i16> @fsext_v4i16(<4 x i8>* %a) {
+; CHECK-LE-LABEL: fsext_v4i16:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ldr s0, [x0]
+; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: fsext_v4i16:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ldr s0, [x0]
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
+; CHECK-BE-NEXT: ret
+ %x = load <4 x i8>, <4 x i8>* %a
+ %y = sext <4 x i8> %x to <4 x i16>
+ ret <4 x i16> %y
+}
+
+define <4 x i16> @fzext_v4i16(<4 x i8>* %a) {
+; CHECK-LE-LABEL: fzext_v4i16:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ldr s0, [x0]
+; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: fzext_v4i16:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ldr s0, [x0]
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
+; CHECK-BE-NEXT: ret
+ %x = load <4 x i8>, <4 x i8>* %a
+ %y = zext <4 x i8> %x to <4 x i16>
+ ret <4 x i16> %y
+}
+
+define <4 x i16> @anyext_v4i16(<4 x i8> *%a, <4 x i8> *%b) {
+; CHECK-LE-LABEL: anyext_v4i16:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ldr s0, [x0]
+; CHECK-LE-NEXT: ldr s1, [x1]
+; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-LE-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-LE-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: anyext_v4i16:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ldr s0, [x0]
+; CHECK-BE-NEXT: ldr s1, [x1]
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: rev32 v1.8b, v1.8b
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-BE-NEXT: shl v0.4h, v0.4h, #8
+; CHECK-BE-NEXT: sshr v0.4h, v0.4h, #8
+; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
+; CHECK-BE-NEXT: ret
+ %x = load <4 x i8>, <4 x i8>* %a, align 4
+ %y = load <4 x i8>, <4 x i8>* %b, align 4
+ %z = add <4 x i8> %x, %y
+ %s = sext <4 x i8> %z to <4 x i16>
+ ret <4 x i16> %s
+}
+
+define <4 x i32> @anyext_v4i32(<4 x i8> *%a, <4 x i8> *%b) {
+; CHECK-LE-LABEL: anyext_v4i32:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: ldr s0, [x0]
+; CHECK-LE-NEXT: ldr s1, [x1]
+; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-LE-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-LE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-LE-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-LE-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: anyext_v4i32:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: ldr s0, [x0]
+; CHECK-BE-NEXT: ldr s1, [x1]
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: rev32 v1.8b, v1.8b
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: ushll v1.8h, v1.8b, #0
+; CHECK-BE-NEXT: add v0.4h, v0.4h, v1.4h
+; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0
+; CHECK-BE-NEXT: shl v0.4s, v0.4s, #24
+; CHECK-BE-NEXT: sshr v0.4s, v0.4s, #24
+; CHECK-BE-NEXT: rev64 v0.4s, v0.4s
+; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
+; CHECK-BE-NEXT: ret
+ %x = load <4 x i8>, <4 x i8>* %a, align 4
+ %y = load <4 x i8>, <4 x i8>* %b, align 4
+ %z = add <4 x i8> %x, %y
+ %s = sext <4 x i8> %z to <4 x i32>
+ ret <4 x i32> %s
+}
+
+define <4 x i8> @bitcast(i32 %0) {
+; CHECK-LE-LABEL: bitcast:
+; CHECK-LE: // %bb.0:
+; CHECK-LE-NEXT: sub sp, sp, #16 // =16
+; CHECK-LE-NEXT: .cfi_def_cfa_offset 16
+; CHECK-LE-NEXT: str w0, [sp, #12]
+; CHECK-LE-NEXT: ldr s0, [sp, #12]
+; CHECK-LE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0
+; CHECK-LE-NEXT: add sp, sp, #16 // =16
+; CHECK-LE-NEXT: ret
+;
+; CHECK-BE-LABEL: bitcast:
+; CHECK-BE: // %bb.0:
+; CHECK-BE-NEXT: sub sp, sp, #16 // =16
+; CHECK-BE-NEXT: .cfi_def_cfa_offset 16
+; CHECK-BE-NEXT: str w0, [sp, #12]
+; CHECK-BE-NEXT: ldr s0, [sp, #12]
+; CHECK-BE-NEXT: rev32 v0.8b, v0.8b
+; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-BE-NEXT: rev64 v0.4h, v0.4h
+; CHECK-BE-NEXT: add sp, sp, #16 // =16
+; CHECK-BE-NEXT: ret
+ %2 = bitcast i32 %0 to <4 x i8>
+ ret <4 x i8> %2
+}
diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
index c63f3399e636..07b257043426 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll
@@ -1494,17 +1494,12 @@ define <8 x i16> @neon.ushl8h_no_constant_shift(<8 x i8>* %A) nounwind {
}
define <4 x i32> @neon.ushl8h_constant_shift_extend_not_2x(<4 x i8>* %A) nounwind {
-;CHECK-LABEL: @neon.ushl8h_constant_shift_extend_not_2x
-;CHECK-NOT: ushll.8h v0,
-;CHECK: ldrb w8, [x0]
-;CHECK: fmov s0, w8
-;CHECK: ldrb w8, [x0, #1]
-;CHECK: mov.s v0[1], w8
-;CHECK: ldrb w8, [x0, #2]
-;CHECK: mov.s v0[2], w8
-;CHECK: ldrb w8, [x0, #3]
-;CHECK: mov.s v0[3], w8
-;CHECK: shl.4s v0, v0, #1
+; CHECK-LABEL: neon.ushl8h_constant_shift_extend_not_2x:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ushll.8h v0, v0, #0
+; CHECK-NEXT: ushll.4s v0, v0, #1
+; CHECK-NEXT: ret
%tmp1 = load <4 x i8>, <4 x i8>* %A
%tmp2 = zext <4 x i8> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.ushl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
@@ -1637,16 +1632,12 @@ define <8 x i16> @neon.sshll8h_constant_shift(<8 x i8>* %A) nounwind {
}
define <4 x i32> @neon.sshl4s_wrong_ext_constant_shift(<4 x i8>* %A) nounwind {
-;CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift
-;CHECK: ldrsb w8, [x0]
-;CHECK-NEXT: fmov s0, w8
-;CHECK-NEXT: ldrsb w8, [x0, #1]
-;CHECK-NEXT: mov.s v0[1], w8
-;CHECK-NEXT: ldrsb w8, [x0, #2]
-;CHECK-NEXT: mov.s v0[2], w8
-;CHECK-NEXT: ldrsb w8, [x0, #3]
-;CHECK-NEXT: mov.s v0[3], w8
-;CHECK-NEXT: shl.4s v0, v0, #1
+; CHECK-LABEL: neon.sshl4s_wrong_ext_constant_shift:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: sshll.8h v0, v0, #0
+; CHECK-NEXT: sshll.4s v0, v0, #1
+; CHECK-NEXT: ret
%tmp1 = load <4 x i8>, <4 x i8>* %A
%tmp2 = sext <4 x i8> %tmp1 to <4 x i32>
%tmp3 = call <4 x i32> @llvm.aarch64.neon.sshl.v4i32(<4 x i32> %tmp2, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
diff --git a/llvm/test/CodeGen/AArch64/neon-extload.ll b/llvm/test/CodeGen/AArch64/neon-extload.ll
deleted file mode 100644
index 321a1babb411..000000000000
--- a/llvm/test/CodeGen/AArch64/neon-extload.ll
+++ /dev/null
@@ -1,145 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=LE
-; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64_be-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=BE
-
-define <4 x i32> @fsext_v4i32(<4 x i8>* %a) {
-; LE-LABEL: fsext_v4i32:
-; LE: // %bb.0:
-; LE-NEXT: ldrsb w8, [x0]
-; LE-NEXT: ldrsb w9, [x0, #1]
-; LE-NEXT: ldrsb w10, [x0, #2]
-; LE-NEXT: ldrsb w11, [x0, #3]
-; LE-NEXT: fmov s0, w8
-; LE-NEXT: mov v0.s[1], w9
-; LE-NEXT: mov v0.s[2], w10
-; LE-NEXT: mov v0.s[3], w11
-; LE-NEXT: ret
-;
-; BE-LABEL: fsext_v4i32:
-; BE: // %bb.0:
-; BE-NEXT: ldrsb w8, [x0]
-; BE-NEXT: ldrsb w9, [x0, #1]
-; BE-NEXT: ldrsb w10, [x0, #2]
-; BE-NEXT: ldrsb w11, [x0, #3]
-; BE-NEXT: fmov s0, w8
-; BE-NEXT: mov v0.s[1], w9
-; BE-NEXT: mov v0.s[2], w10
-; BE-NEXT: mov v0.s[3], w11
-; BE-NEXT: rev64 v0.4s, v0.4s
-; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; BE-NEXT: ret
- %x = load <4 x i8>, <4 x i8>* %a
- %y = sext <4 x i8> %x to <4 x i32>
- ret <4 x i32> %y
-}
-
-define <4 x i32> @fzext_v4i32(<4 x i8>* %a) {
-; LE-LABEL: fzext_v4i32:
-; LE: // %bb.0:
-; LE-NEXT: ldrb w8, [x0]
-; LE-NEXT: ldrb w9, [x0, #1]
-; LE-NEXT: ldrb w10, [x0, #2]
-; LE-NEXT: ldrb w11, [x0, #3]
-; LE-NEXT: fmov s0, w8
-; LE-NEXT: mov v0.s[1], w9
-; LE-NEXT: mov v0.s[2], w10
-; LE-NEXT: mov v0.s[3], w11
-; LE-NEXT: ret
-;
-; BE-LABEL: fzext_v4i32:
-; BE: // %bb.0:
-; BE-NEXT: ldrb w8, [x0]
-; BE-NEXT: ldrb w9, [x0, #1]
-; BE-NEXT: ldrb w10, [x0, #2]
-; BE-NEXT: ldrb w11, [x0, #3]
-; BE-NEXT: fmov s0, w8
-; BE-NEXT: mov v0.s[1], w9
-; BE-NEXT: mov v0.s[2], w10
-; BE-NEXT: mov v0.s[3], w11
-; BE-NEXT: rev64 v0.4s, v0.4s
-; BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8
-; BE-NEXT: ret
- %x = load <4 x i8>, <4 x i8>* %a
- %y = zext <4 x i8> %x to <4 x i32>
- ret <4 x i32> %y
-}
-
-define i32 @loadExt.i32(<4 x i8>* %ref) {
-; CHECK-LABEL: loadExt.i32:
-; CHECK: ldrb
-; LE-LABEL: loadExt.i32:
-; LE: // %bb.0:
-; LE-NEXT: ldrb w0, [x0]
-; LE-NEXT: ret
-;
-; BE-LABEL: loadExt.i32:
-; BE: // %bb.0:
-; BE-NEXT: ldrb w0, [x0]
-; BE-NEXT: ret
- %a = load <4 x i8>, <4 x i8>* %ref
- %vecext = extractelement <4 x i8> %a, i32 0
- %conv = zext i8 %vecext to i32
- ret i32 %conv
-}
-
-define <4 x i16> @fsext_v4i16(<4 x i8>* %a) {
-; LE-LABEL: fsext_v4i16:
-; LE: // %bb.0:
-; LE-NEXT: ldrsb w8, [x0]
-; LE-NEXT: ldrsb w9, [x0, #1]
-; LE-NEXT: ldrsb w10, [x0, #2]
-; LE-NEXT: ldrsb w11, [x0, #3]
-; LE-NEXT: fmov s0, w8
-; LE-NEXT: mov v0.h[1], w9
-; LE-NEXT: mov v0.h[2], w10
-; LE-NEXT: mov v0.h[3], w11
-; LE-NEXT: // kill: def $d0 killed $d0 killed $q0
-; LE-NEXT: ret
-;
-; BE-LABEL: fsext_v4i16:
-; BE: // %bb.0:
-; BE-NEXT: ldrsb w8, [x0]
-; BE-NEXT: ldrsb w9, [x0, #1]
-; BE-NEXT: ldrsb w10, [x0, #2]
-; BE-NEXT: ldrsb w11, [x0, #3]
-; BE-NEXT: fmov s0, w8
-; BE-NEXT: mov v0.h[1], w9
-; BE-NEXT: mov v0.h[2], w10
-; BE-NEXT: mov v0.h[3], w11
-; BE-NEXT: rev64 v0.4h, v0.4h
-; BE-NEXT: ret
- %x = load <4 x i8>, <4 x i8>* %a
- %y = sext <4 x i8> %x to <4 x i16>
- ret <4 x i16> %y
-}
-
-define <4 x i16> @fzext_v4i16(<4 x i8>* %a) {
-; LE-LABEL: fzext_v4i16:
-; LE: // %bb.0:
-; LE-NEXT: ldrb w8, [x0]
-; LE-NEXT: ldrb w9, [x0, #1]
-; LE-NEXT: ldrb w10, [x0, #2]
-; LE-NEXT: ldrb w11, [x0, #3]
-; LE-NEXT: fmov s0, w8
-; LE-NEXT: mov v0.h[1], w9
-; LE-NEXT: mov v0.h[2], w10
-; LE-NEXT: mov v0.h[3], w11
-; LE-NEXT: // kill: def $d0 killed $d0 killed $q0
-; LE-NEXT: ret
-;
-; BE-LABEL: fzext_v4i16:
-; BE: // %bb.0:
-; BE-NEXT: ldrb w8, [x0]
-; BE-NEXT: ldrb w9, [x0, #1]
-; BE-NEXT: ldrb w10, [x0, #2]
-; BE-NEXT: ldrb w11, [x0, #3]
-; BE-NEXT: fmov s0, w8
-; BE-NEXT: mov v0.h[1], w9
-; BE-NEXT: mov v0.h[2], w10
-; BE-NEXT: mov v0.h[3], w11
-; BE-NEXT: rev64 v0.4h, v0.4h
-; BE-NEXT: ret
- %x = load <4 x i8>, <4 x i8>* %a
- %y = zext <4 x i8> %x to <4 x i16>
- ret <4 x i16> %y
-}
diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
index cefd4758b374..9c654f6719b1 100644
--- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll
@@ -112,22 +112,10 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
; CHECK-LABEL: v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrsb w8, [x0]
-; CHECK-NEXT: ldrsb w9, [x1]
-; CHECK-NEXT: ldrsb w10, [x0, #1]
-; CHECK-NEXT: ldrsb w11, [x1, #1]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fmov s1, w9
-; CHECK-NEXT: ldrsb w8, [x0, #2]
-; CHECK-NEXT: ldrsb w9, [x1, #2]
-; CHECK-NEXT: mov v0.h[1], w10
-; CHECK-NEXT: mov v1.h[1], w11
-; CHECK-NEXT: ldrsb w10, [x0, #3]
-; CHECK-NEXT: ldrsb w11, [x1, #3]
-; CHECK-NEXT: mov v0.h[2], w8
-; CHECK-NEXT: mov v1.h[2], w9
-; CHECK-NEXT: mov v0.h[3], w10
-; CHECK-NEXT: mov v1.h[3], w11
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ldr s1, [x1]
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-NEXT: shl v1.4h, v1.4h, #8
; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h
diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
index 17af8a11aeee..7c2e2330608e 100644
--- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll
@@ -113,22 +113,10 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
; CHECK-LABEL: v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrsb w8, [x0]
-; CHECK-NEXT: ldrsb w9, [x1]
-; CHECK-NEXT: ldrsb w10, [x0, #1]
-; CHECK-NEXT: ldrsb w11, [x1, #1]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fmov s1, w9
-; CHECK-NEXT: ldrsb w8, [x0, #2]
-; CHECK-NEXT: ldrsb w9, [x1, #2]
-; CHECK-NEXT: mov v0.h[1], w10
-; CHECK-NEXT: mov v1.h[1], w11
-; CHECK-NEXT: ldrsb w10, [x0, #3]
-; CHECK-NEXT: ldrsb w11, [x1, #3]
-; CHECK-NEXT: mov v0.h[2], w8
-; CHECK-NEXT: mov v1.h[2], w9
-; CHECK-NEXT: mov v0.h[3], w10
-; CHECK-NEXT: mov v1.h[3], w11
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ldr s1, [x1]
+; CHECK-NEXT: sshll v0.8h, v0.8b, #0
+; CHECK-NEXT: sshll v1.8h, v1.8b, #0
; CHECK-NEXT: shl v1.4h, v1.4h, #8
; CHECK-NEXT: shl v0.4h, v0.4h, #8
; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h
diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
index 21427a6a92d7..2b52e4c934c9 100644
--- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll
@@ -112,23 +112,11 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
; CHECK-LABEL: v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: ldrb w9, [x1]
-; CHECK-NEXT: ldrb w10, [x0, #1]
-; CHECK-NEXT: ldrb w11, [x1, #1]
-; CHECK-NEXT: ldrb w12, [x0, #2]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: ldrb w8, [x1, #2]
-; CHECK-NEXT: fmov s1, w9
-; CHECK-NEXT: mov v0.h[1], w10
-; CHECK-NEXT: ldrb w9, [x0, #3]
-; CHECK-NEXT: ldrb w10, [x1, #3]
-; CHECK-NEXT: mov v1.h[1], w11
-; CHECK-NEXT: mov v0.h[2], w12
-; CHECK-NEXT: mov v1.h[2], w8
-; CHECK-NEXT: mov v0.h[3], w9
-; CHECK-NEXT: mov v1.h[3], w10
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ldr s1, [x1]
; CHECK-NEXT: movi d2, #0xff00ff00ff00ff
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: add v0.4h, v0.4h, v1.4h
; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h
; CHECK-NEXT: xtn v0.8b, v0.8h
diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
index a0ab8040e8fc..63bbac3be3fb 100644
--- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
+++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll
@@ -113,22 +113,10 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind {
define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind {
; CHECK-LABEL: v4i8:
; CHECK: // %bb.0:
-; CHECK-NEXT: ldrb w8, [x0]
-; CHECK-NEXT: ldrb w9, [x1]
-; CHECK-NEXT: ldrb w10, [x0, #1]
-; CHECK-NEXT: ldrb w11, [x1, #1]
-; CHECK-NEXT: fmov s0, w8
-; CHECK-NEXT: fmov s1, w9
-; CHECK-NEXT: ldrb w8, [x0, #2]
-; CHECK-NEXT: ldrb w9, [x1, #2]
-; CHECK-NEXT: mov v0.h[1], w10
-; CHECK-NEXT: mov v1.h[1], w11
-; CHECK-NEXT: ldrb w10, [x0, #3]
-; CHECK-NEXT: ldrb w11, [x1, #3]
-; CHECK-NEXT: mov v0.h[2], w8
-; CHECK-NEXT: mov v1.h[2], w9
-; CHECK-NEXT: mov v0.h[3], w10
-; CHECK-NEXT: mov v1.h[3], w11
+; CHECK-NEXT: ldr s0, [x0]
+; CHECK-NEXT: ldr s1, [x1]
+; CHECK-NEXT: ushll v0.8h, v0.8b, #0
+; CHECK-NEXT: ushll v1.8h, v1.8b, #0
; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h
; CHECK-NEXT: xtn v0.8b, v0.8h
; CHECK-NEXT: str s0, [x2]
More information about the llvm-commits
mailing list