[llvm] c15a56f - [ARM] Fill in FP16 FMA patterns
David Green via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 5 03:55:47 PST 2020
Author: David Green
Date: 2020-01-05T11:24:04Z
New Revision: c15a56f61a56e862c9613a334d1427638899942b
URL: https://github.com/llvm/llvm-project/commit/c15a56f61a56e862c9613a334d1427638899942b
DIFF: https://github.com/llvm/llvm-project/commit/c15a56f61a56e862c9613a334d1427638899942b.diff
LOG: [ARM] Fill in FP16 FMA patterns
This adds fp16 variants of all the fma patterns in the ARM backend.
Differential Revision: https://reviews.llvm.org/D72138
Added:
Modified:
llvm/lib/Target/ARM/ARMInstrVFP.td
llvm/test/CodeGen/ARM/fp16-fusedMAC.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td
index 550011bc8ce4..a41a483d1a4c 100644
--- a/llvm/lib/Target/ARM/ARMInstrVFP.td
+++ b/llvm/lib/Target/ARM/ARMInstrVFP.td
@@ -2143,6 +2143,9 @@ def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin)),
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin)),
+ (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fma x, (fneg y), z) -> (vfms z, x, y)
def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
(VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
@@ -2150,6 +2153,9 @@ def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)),
def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)),
(VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin)),
+ (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
def VFNMAD : ADbI<0b11101, 0b01, 1, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -2196,6 +2202,9 @@ def : Pat<(fneg (fma (f64 DPR:$Dn), (f64 DPR:$Dm), (f64 DPR:$Ddin))),
def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(fneg (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 HPR:$Sdin))),
+ (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y)
def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
(VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
@@ -2203,6 +2212,9 @@ def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))),
def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, (fneg HPR:$Sdin))),
+ (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
def VFNMSD : ADbI<0b11101, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
@@ -2248,6 +2260,9 @@ def : Pat<(f64 (fma DPR:$Dn, DPR:$Dm, (fneg DPR:$Ddin))),
def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, (fneg HPR:$Sdin))),
+ (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
@@ -2255,6 +2270,9 @@ def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))),
def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(fneg (f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin))),
+ (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
// (fneg (fma x, (fneg y), z) -> (vfnms z, x, y)
def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
(VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>,
@@ -2262,6 +2280,9 @@ def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))),
def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))),
(VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>,
Requires<[HasVFP4]>;
+def : Pat<(fneg (f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin))),
+ (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>,
+ Requires<[HasFullFP16]>;
//===----------------------------------------------------------------------===//
// FP Conditional moves.
diff --git a/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll b/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll
index fea408a01a42..b6387b872622 100644
--- a/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll
+++ b/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll
@@ -230,21 +230,19 @@ define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms1:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
-; CHECK-NEXT: vldr.16 s2, [r2]
-; CHECK-NEXT: vldr.16 s4, [r0]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s2, s4, s0
-; CHECK-NEXT: vstr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s4, [r2]
+; CHECK-NEXT: vfms.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms1:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
-; DONT-FUSE-NEXT: vldr.16 s2, [r2]
-; DONT-FUSE-NEXT: vldr.16 s4, [r0]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s2, s4, s0
-; DONT-FUSE-NEXT: vstr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s4, [r2]
+; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
@@ -259,22 +257,20 @@ define arm_aapcs_vfpcc void @fms1(half *%a1, half *%a2, half *%a3) {
define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fms2:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr.16 s0, [r1]
-; CHECK-NEXT: vldr.16 s2, [r2]
-; CHECK-NEXT: vldr.16 s4, [r0]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s2, s0, s4
-; CHECK-NEXT: vstr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s0, [r0]
+; CHECK-NEXT: vldr.16 s2, [r1]
+; CHECK-NEXT: vldr.16 s4, [r2]
+; CHECK-NEXT: vfms.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fms2:
; DONT-FUSE: @ %bb.0:
-; DONT-FUSE-NEXT: vldr.16 s0, [r1]
-; DONT-FUSE-NEXT: vldr.16 s2, [r2]
-; DONT-FUSE-NEXT: vldr.16 s4, [r0]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
-; DONT-FUSE-NEXT: vstr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s0, [r0]
+; DONT-FUSE-NEXT: vldr.16 s2, [r1]
+; DONT-FUSE-NEXT: vldr.16 s4, [r2]
+; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
@@ -292,9 +288,8 @@ define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) {
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
-; CHECK-NEXT: vfma.f16 s4, s2, s0
-; CHECK-NEXT: vneg.f16 s0, s4
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vfnma.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnma1:
@@ -302,9 +297,8 @@ define arm_aapcs_vfpcc void @fnma1(half *%a1, half *%a2, half *%a3) {
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
-; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
-; DONT-FUSE-NEXT: vneg.f16 s0, s4
-; DONT-FUSE-NEXT: vstr.16 s0, [r0]
+; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
@@ -321,10 +315,8 @@ define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) {
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
-; CHECK-NEXT: vneg.f16 s2, s2
; CHECK-NEXT: vldr.16 s4, [r2]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s4, s2, s0
+; CHECK-NEXT: vfnma.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
@@ -332,10 +324,8 @@ define arm_aapcs_vfpcc void @fnma2(half *%a1, half *%a2, half *%a3) {
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
-; DONT-FUSE-NEXT: vneg.f16 s2, s2
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
@@ -355,8 +345,7 @@ define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) {
; CHECK-NEXT: vldr.16 s0, [r1]
; CHECK-NEXT: vldr.16 s2, [r0]
; CHECK-NEXT: vldr.16 s4, [r2]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s4, s2, s0
+; CHECK-NEXT: vfnms.f16 s4, s2, s0
; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
@@ -365,8 +354,7 @@ define arm_aapcs_vfpcc void @fnms1(half *%a1, half *%a2, half *%a3) {
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
; DONT-FUSE-NEXT: vldr.16 s2, [r0]
; DONT-FUSE-NEXT: vldr.16 s4, [r2]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
@@ -383,23 +371,19 @@ define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms2:
; CHECK: @ %bb.0:
; CHECK-NEXT: vldr.16 s0, [r1]
-; CHECK-NEXT: vldr.16 s2, [r2]
-; CHECK-NEXT: vldr.16 s4, [r0]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s2, s4, s0
-; CHECK-NEXT: vneg.f16 s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vldr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s4, [r2]
+; CHECK-NEXT: vfnms.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms2:
; DONT-FUSE: @ %bb.0:
; DONT-FUSE-NEXT: vldr.16 s0, [r1]
-; DONT-FUSE-NEXT: vldr.16 s2, [r2]
-; DONT-FUSE-NEXT: vldr.16 s4, [r0]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s2, s4, s0
-; DONT-FUSE-NEXT: vneg.f16 s0, s2
-; DONT-FUSE-NEXT: vstr.16 s0, [r0]
+; DONT-FUSE-NEXT: vldr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s4, [r2]
+; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
@@ -415,24 +399,20 @@ define arm_aapcs_vfpcc void @fnms2(half *%a1, half *%a2, half *%a3) {
define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) {
; CHECK-LABEL: fnms3:
; CHECK: @ %bb.0:
-; CHECK-NEXT: vldr.16 s0, [r0]
-; CHECK-NEXT: vldr.16 s2, [r2]
-; CHECK-NEXT: vldr.16 s4, [r1]
-; CHECK-NEXT: vneg.f16 s4, s4
-; CHECK-NEXT: vfma.f16 s2, s0, s4
-; CHECK-NEXT: vneg.f16 s0, s2
-; CHECK-NEXT: vstr.16 s0, [r0]
+; CHECK-NEXT: vldr.16 s0, [r1]
+; CHECK-NEXT: vldr.16 s2, [r0]
+; CHECK-NEXT: vldr.16 s4, [r2]
+; CHECK-NEXT: vfnms.f16 s4, s2, s0
+; CHECK-NEXT: vstr.16 s4, [r0]
; CHECK-NEXT: bx lr
;
; DONT-FUSE-LABEL: fnms3:
; DONT-FUSE: @ %bb.0:
-; DONT-FUSE-NEXT: vldr.16 s0, [r0]
-; DONT-FUSE-NEXT: vldr.16 s2, [r2]
-; DONT-FUSE-NEXT: vldr.16 s4, [r1]
-; DONT-FUSE-NEXT: vneg.f16 s4, s4
-; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4
-; DONT-FUSE-NEXT: vneg.f16 s0, s2
-; DONT-FUSE-NEXT: vstr.16 s0, [r0]
+; DONT-FUSE-NEXT: vldr.16 s0, [r1]
+; DONT-FUSE-NEXT: vldr.16 s2, [r0]
+; DONT-FUSE-NEXT: vldr.16 s4, [r2]
+; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0
+; DONT-FUSE-NEXT: vstr.16 s4, [r0]
; DONT-FUSE-NEXT: bx lr
%f1 = load half, half *%a1, align 2
More information about the llvm-commits
mailing list