[llvm] r338830 - [ARM] FP16: support VFMA

Fri Aug 3 02:12:57 PDT 2018

Author: sjoerdmeijer
Date: Fri Aug  3 02:12:56 2018
New Revision: 338830

URL: http://llvm.org/viewvc/llvm-project?rev=338830&view=rev
Log:
[ARM] FP16: support VFMA

This is addressing PR38404.

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
    llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll

Modified: llvm/trunk/lib/Target/ARM/ARMInstrNEON.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrNEON.td?rev=338830&r1=338829&r2=338830&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMInstrNEON.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrNEON.td Fri Aug  3 02:12:56 2018
@@ -4734,6 +4734,12 @@ def  VFMShq   : N3VQMulOp<0, 0, 0b11, 0b
                 Requires<[HasNEON,HasFullFP16,UseFusedMAC]>;
 
 // Match @llvm.fma.* intrinsics
+def : Pat<(v4f16 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
+          (VFMAhd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
+          Requires<[HasNEON,HasFullFP16]>;
+def : Pat<(v8f16 (fma QPR:$Vn, QPR:$Vm, QPR:$src1)),
+          (VFMAhq QPR:$src1, QPR:$Vn, QPR:$Vm)>,
+          Requires<[HasNEON,HasFullFP16]>;
 def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)),
           (VFMAfd DPR:$src1, DPR:$Vn, DPR:$Vm)>,
           Requires<[HasVFP4]>;

Modified: llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll?rev=338830&r1=338829&r2=338830&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/armv8.2a-fp16-vector-intrinsics.ll Fri Aug  3 02:12:56 2018
@@ -911,34 +911,48 @@ entry:
   ret <8 x half> %sub.i
 }
 
-; FIXME (PR38404)
-;
-;define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
-;entry:
-;  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
-;  ret <4 x half> %0
-;}
+define dso_local <4 x half> @test_vfma_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; CHECK-LABEL: test_vfma_f16:
+; CHECK:         vfma.f16 d0, d1, d2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %c, <4 x half> %a)
+  ret <4 x half> %0
+}
 
-;define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-;entry:
-;  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
-;  ret <8 x half> %0
-;}
+define dso_local <8 x half> @test_vfmaq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; CHECK-LABEL: test_vfmaq_f16:
+; CHECK:         vfma.f16 q0, q1, q2
+; CHECK-NEXT:    bx lr
+entry:
+  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %c, <8 x half> %a)
+  ret <8 x half> %0
+}
 
-;define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
-;entry:
-;  %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
-;  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
-;  ret <4 x half> %0
-;}
+define dso_local <4 x half> @test_vfms_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
+; CHECK-LABEL: test_vfms_f16:
+; CHECK:         vneg.f16 [[D16:d[0-9]+]], d1
+; CHECK-NEXT:    vfma.f16 d0, [[D16]], d2
+; CHECK-NEXT:    bx lr
+entry:
+  %sub.i = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+  %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub.i, <4 x half> %c, <4 x half> %a)
+  ret <4 x half> %0
+}
 
-;define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
-;entry:
-;  %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
-;  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
-;  ret <8 x half> %0
-;}
+define dso_local <8 x half> @test_vfmsq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; CHECK-LABEL: test_vfmsq_f16:
+; CHECK:         vneg.f16 [[Q8:q[0-9]+]], q1
+; CHECK-NEXT:    vfma.f16 q0, [[Q8]], q2
+; CHECK-NEXT:    bx lr
+entry:
+  %sub.i = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
+  %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub.i, <8 x half> %c, <8 x half> %a)
+  ret <8 x half> %0
+}
 
+; FIXME (PR38404)
+;
 ;define dso_local <4 x half> @test_vmul_lane_f16(<4 x half> %a, <4 x half> %b) {
 ;entry:
 ;  %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3>