[llvm] r313972 - [ARM] Add missing selection patterns for vnmla
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 22 02:50:52 PDT 2017
Author: rksimon
Date: Fri Sep 22 02:50:52 2017
New Revision: 313972
URL: http://llvm.org/viewvc/llvm-project?rev=313972&view=rev
Log:
[ARM] Add missing selection patterns for vnmla
For the following function:
double fn1(double d0, double d1, double d2) {
double a = -d0 - d1 * d2;
return a;
}
on ARM, LLVM generates code along the lines of
vneg.f64 d0, d0
vmls.f64 d0, d1, d2
i.e., a negate and a multiply-subtract.
The attached patch adds instruction selection patterns to allow it to generate the single instruction
vnmla.f64 d0, d1, d2
(multiply-add with negation) instead, like GCC does.
Committed on behalf of @gergo- (Gergö Barany)
Differential Revision: https://reviews.llvm.org/D35911
Modified:
llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
llvm/trunk/test/CodeGen/ARM/fnmscs.ll
Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=313972&r1=313971&r2=313972&view=diff
==============================================================================
--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Fri Sep 22 02:50:52 2017
@@ -1857,6 +1857,7 @@ def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
RegConstraint<"$Sdin = $Sd">,
Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
+// (-(a * b) - dst) -> -(dst + (a * b))
def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
(VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1864,6 +1865,14 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$
(VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+// (-dst - (a * b)) -> -(dst + (a * b))
+def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))),
+ (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+ Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)),
+ (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+ Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
(outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",
Modified: llvm/trunk/test/CodeGen/ARM/fnmscs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fnmscs.ll?rev=313972&r1=313971&r2=313972&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fnmscs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fnmscs.ll Fri Sep 22 02:50:52 2017
@@ -1,7 +1,10 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN: llc -mtriple=arm-eabihf -mattr=+vfp2 %s -o - \
; RUN: | FileCheck %s -check-prefix=VFP2
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN: llc -mtriple=arm-eabihf -mattr=+vfp3 %s -o - \
+; RUN: | FileCheck %s -check-prefix=VFP3
+
+; RUN: llc -mtriple=arm-eabihf -mattr=+neon %s -o - \
; RUN: | FileCheck %s -check-prefix=NEON
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
@@ -21,6 +24,9 @@ entry:
; VFP2-LABEL: t1:
; VFP2: vnmla.f32
+; VFP3-LABEL: t1:
+; VFP3: vnmla.f32
+
; NEON-LABEL: t1:
; NEON: vnmla.f32
@@ -42,6 +48,9 @@ entry:
; VFP2-LABEL: t2:
; VFP2: vnmla.f32
+; VFP3-LABEL: t2:
+; VFP3: vnmla.f32
+
; NEON-LABEL: t2:
; NEON: vnmla.f32
@@ -63,6 +72,9 @@ entry:
; VFP2-LABEL: t3:
; VFP2: vnmla.f64
+; VFP3-LABEL: t3:
+; VFP3: vnmla.f64
+
; NEON-LABEL: t3:
; NEON: vnmla.f64
@@ -84,6 +96,9 @@ entry:
; VFP2-LABEL: t4:
; VFP2: vnmla.f64
+; VFP3-LABEL: t4:
+; VFP3: vnmla.f64
+
; NEON-LABEL: t4:
; NEON: vnmla.f64
@@ -99,3 +114,53 @@ entry:
%2 = fsub double %1, %acc
ret double %2
}
+
+define double @t5(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2-LABEL: t5:
+; VFP2: vnmla.f64
+
+; VFP3-LABEL: t5:
+; VFP3: vnmla.f64
+
+; NEON-LABEL: t5:
+; NEON: vnmla.f64
+
+; A8U-LABEL: t5:
+; A8U: vmul.f64 d
+; A8U: vsub.f64 d
+
+; A8-LABEL: t5:
+; A8: vmul.f64 d
+; A8: vsub.f64 d
+
+ %0 = fsub double -0.0, %acc
+ %1 = fmul double %a, %b
+ %2 = fsub double %0, %1
+ ret double %2
+}
+
+define float @t6(float %acc, float %a, float %b) nounwind {
+entry:
+; VFP2-LABEL: t6:
+; VFP2: vnmla.f32
+
+; VFP3-LABEL: t6:
+; VFP3: vnmla.f32
+
+; NEON-LABEL: t6:
+; NEON: vnmla.f32
+
+; A8U-LABEL: t6:
+; A8U: vmul.f32 d
+; A8U: vsub.f32 d
+
+; A8-LABEL: t6:
+; A8: vmul.f32 s
+; A8: vsub.f32 s
+
+ %0 = fsub float -0.0, %acc
+ %1 = fmul float %a, %b
+ %2 = fsub float %0, %1
+ ret float %2
+}
More information about the llvm-commits
mailing list