[llvm] r358081 - [ARM] [FIX] Add missing f16 vector operations lowering
Diogo N. Sampaio via llvm-commits
llvm-commits at lists.llvm.org
Wed Apr 10 06:28:06 PDT 2019
Author: dnsampaio
Date: Wed Apr 10 06:28:06 2019
New Revision: 358081
URL: http://llvm.org/viewvc/llvm-project?rev=358081&view=rev
Log:
[ARM] [FIX] Add missing f16 vector operations lowering
Summary:
Add missing <8xhalf> shufflevectors pattern, when using concat_vector dag node.
As well, allows <8xhalf> and <4xhalf> vldup1 operations.
These instructions are required for v8.2a fp16 lowering of vmul_n_f16, vmulq_n_f16 and vmulq_lane_f16 intrinsics.
Reviewers: olista01, pbarrio, LukeGeeson, efriedma
Reviewed By: efriedma
Subscribers: efriedma, javed.absar, kristof.beyls, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D60319
Modified:
llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
Modified: llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp?rev=358081&r1=358080&r2=358081&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/ARM/ARMISelDAGToDAG.cpp Wed Apr 10 06:28:06 2019
@@ -2212,7 +2212,10 @@ void ARMDAGToDAGISel::SelectVLDDup(SDNod
case MVT::v8i8:
case MVT::v16i8: OpcodeIndex = 0; break;
case MVT::v4i16:
- case MVT::v8i16: OpcodeIndex = 1; break;
+ case MVT::v8i16:
+ case MVT::v4f16:
+ case MVT::v8f16:
+ OpcodeIndex = 1; break;
case MVT::v2f32:
case MVT::v2i32:
case MVT::v4f32:
Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=358081&r1=358080&r2=358081&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Wed Apr 10 06:28:06 2019
@@ -7576,6 +7576,8 @@ def : Pat<(v16i8 (concat_vectors DPR:$Dn
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
def : Pat<(v4f32 (concat_vectors DPR:$Dn, DPR:$Dm)),
(REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
+def : Pat<(v8f16 (concat_vectors DPR:$Dn, DPR:$Dm)),
+ (REG_SEQUENCE QPR, DPR:$Dn, dsub_0, DPR:$Dm, dsub_1)>;
//===----------------------------------------------------------------------===//
// Assembler aliases
Modified: llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll?rev=358081&r1=358080&r2=358081&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll Wed Apr 10 06:28:06 2019
@@ -1225,6 +1225,42 @@ entry:
ret <8 x half> %shuffle.i
}
+define <4 x half> @test_vld_dup1_4xhalf(half* %b) {
+; CHECK-LABEL: test_vld_dup1_4xhalf:
+; CHECK: vld1.16 {d0[]}, [r0:16]
+; CHECK-NEXT: bx lr
+
+entry:
+ %b1 = load half, half* %b, align 2
+ %vecinit = insertelement <4 x half> undef, half %b1, i32 0
+ %vecinit2 = insertelement <4 x half> %vecinit, half %b1, i32 1
+ %vecinit3 = insertelement <4 x half> %vecinit2, half %b1, i32 2
+ %vecinit4 = insertelement <4 x half> %vecinit3, half %b1, i32 3
+ ret <4 x half> %vecinit4
+}
+
+define <8 x half> @test_vld_dup1_8xhalf(half* %b) local_unnamed_addr {
+; CHECK-LABEL: test_vld_dup1_8xhalf:
+; CHECK: vld1.16 {d0[], d1[]}, [r0:16]
+; CHECK-NEXT: bx lr
+
+entry:
+ %b1 = load half, half* %b, align 2
+ %vecinit = insertelement <8 x half> undef, half %b1, i32 0
+ %vecinit8 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
+ ret <8 x half> %vecinit8
+}
+
+define <8 x half> @test_shufflevector8xhalf(<4 x half> %a) {
+; CHECK-LABEL: test_shufflevector8xhalf:
+; CHECK: vmov.f64 d1, d0
+; CHECK-NEXT: bx lr
+
+entry:
+ %r = shufflevector <4 x half> %a, <4 x half> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x half> %r
+}
+
declare <4 x half> @llvm.fabs.v4f16(<4 x half>)
declare <8 x half> @llvm.fabs.v8f16(<8 x half>)
declare <4 x i16> @llvm.arm.neon.vcvtas.v4i16.v4f16(<4 x half>)
More information about the llvm-commits
mailing list