[PATCH] D50191: [ARM] FP16: support VFMA
Sjoerd Meijer via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 2 08:44:06 PDT 2018
SjoerdMeijer created this revision.
SjoerdMeijer added reviewers: olista01, samparker, john.brawn, efriedma.
Herald added a reviewer: javed.absar.
Herald added subscribers: chrib, kristof.beyls.
This is addressing PR38404.
https://reviews.llvm.org/D50191
Files:
lib/Target/ARM/ARMInstrNEON.td
test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
Index: test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
===================================================================
--- test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
+++ test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
@@ -911,34 +911,48 @@
ret <8 x half> %sub.i
}
-; FIXME (PR38404)
-;
-;define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
-;entry:
-; %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
-; ret <4 x half> %0
-;}
+define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; CHECK-LABEL: test_vfma_f16:
+; CHECK: vfma.f16 d0, d1, d2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
+ ret <4 x half> %0
+}
-;define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-;entry:
-; %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
-; ret <8 x half> %0
-;}
+define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; CHECK-LABEL: test_vfmaq_f16:
+; CHECK: vfma.f16 q0, q1, q2
+; CHECK-NEXT: bx lr
+entry:
+ %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
+ ret <8 x half> %0
+}
-;define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
-;entry:
-; %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
-; %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
-; ret <4 x half> %0
-;}
+define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; CHECK-LABEL: test_vfms_f16:
+; CHECK: vneg.f16 [[D16:d[0-9]+]], d1
+; CHECK-NEXT: vfma.f16 d0, [[D16]], d2
+; CHECK-NEXT: bx lr
+entry:
+ %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+ %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
+ ret <4 x half> %0
+}
-;define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-;entry:
-; %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
-; %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
-; ret <8 x half> %0
-;}
+define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; CHECK-LABEL: test_vfmsq_f16:
+; CHECK: vneg.f16 [[Q8:q[0-9]+]], q1
+; CHECK-NEXT: vfma.f16 q0, [[Q8]], q2
+; CHECK-NEXT: bx lr
+entry:
+ %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+ %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
+ ret <8 x half> %0
+}
+; FIXME (PR38404)
+;
;define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
;entry:
; %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
Index: lib/Target/ARM/ARMInstrNEON.td
===================================================================
--- lib/Target/ARM/ARMInstrNEON.td
+++ lib/Target/ARM/ARMInstrNEON.td
@@ -4734,6 +4734,12 @@
Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
// Match @llvm.fma.* intrinsics
+def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
+ (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+ Requires<[HasFullFP16]>;
+def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
+ (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+ Requires<[HasFullFP16]>;
def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
(VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
Requires<[HasVFP4]>;
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D50191.158767.patch
Type: text/x-patch
Size: 3937 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20180802/6e66ef8e/attachment.bin>
More information about the llvm-commits
mailing list