[llvm] r313972 - [ARM] Add missing selection patterns for vnmla

Fri Sep 22 02:50:52 PDT 2017

Author: rksimon
Date: Fri Sep 22 02:50:52 2017
New Revision: 313972

URL: http://llvm.org/viewvc/llvm-project?rev=313972&view=rev
Log:
[ARM] Add missing selection patterns for vnmla

For the following function:

  double fn1(double d0, double d1, double d2) {
    double a = -d0 - d1 * d2;
    return a;
  }

on ARM, LLVM generates code along the lines of

  vneg.f64  d0, d0
  vmls.f64  d0, d1, d2

i.e., a negate and a multiply-subtract.

The attached patch adds instruction selection patterns to allow it to generate the single instruction

  vnmla.f64  d0, d1, d2

(multiply-add with negation) instead, like GCC does.

Committed on behalf of @gergo- (Gergö Barany)

Differential Revision: https://reviews.llvm.org/D35911

Modified:
    llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
    llvm/trunk/test/CodeGen/ARM/fnmscs.ll

Modified: llvm/trunk/lib/Target/ARM/ARMInstrVFP.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/ARM/ARMInstrVFP.td?rev=313972&r1=313971&r2=313972&view=diff
==============================================================================

--- llvm/trunk/lib/Target/ARM/ARMInstrVFP.td (original)
+++ llvm/trunk/lib/Target/ARM/ARMInstrVFP.td Fri Sep 22 02:50:52 2017
@@ -1857,6 +1857,7 @@ def VNMLAH : AHbI<0b11100, 0b01, 1, 0,
                 RegConstraint<"$Sdin = $Sd">,
                 Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>;
 
+// (-(a * b) - dst) -> -(dst + (a * b))
 def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin),
           (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
           Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
@@ -1864,6 +1865,14 @@ def : Pat<(fsub_mlx (fneg (fmul_su SPR:$
           (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
           Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
 
+// (-dst - (a * b)) -> -(dst + (a * b))
+def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))),
+          (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>,
+          Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>;
+def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)),
+          (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>,
+          Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>;
+
 def VNMLSD : ADbI<0b11100, 0b01, 0, 0,
                   (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm),
                   IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm",

Modified: llvm/trunk/test/CodeGen/ARM/fnmscs.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/fnmscs.ll?rev=313972&r1=313971&r2=313972&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/ARM/fnmscs.ll (original)
+++ llvm/trunk/test/CodeGen/ARM/fnmscs.ll Fri Sep 22 02:50:52 2017
@@ -1,7 +1,10 @@
-; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \
+; RUN: llc -mtriple=arm-eabihf -mattr=+vfp2 %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=VFP2
 
-; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \
+; RUN: llc -mtriple=arm-eabihf -mattr=+vfp3 %s -o - \
+; RUN:  | FileCheck %s -check-prefix=VFP3
+
+; RUN: llc -mtriple=arm-eabihf -mattr=+neon %s -o - \
 ; RUN:  | FileCheck %s -check-prefix=NEON
 
 ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \
@@ -21,6 +24,9 @@ entry:
 ; VFP2-LABEL: t1:
 ; VFP2: vnmla.f32
 
+; VFP3-LABEL: t1:
+; VFP3: vnmla.f32
+
 ; NEON-LABEL: t1:
 ; NEON: vnmla.f32
 
@@ -42,6 +48,9 @@ entry:
 ; VFP2-LABEL: t2:
 ; VFP2: vnmla.f32
 
+; VFP3-LABEL: t2:
+; VFP3: vnmla.f32
+
 ; NEON-LABEL: t2:
 ; NEON: vnmla.f32
 
@@ -63,6 +72,9 @@ entry:
 ; VFP2-LABEL: t3:
 ; VFP2: vnmla.f64
 
+; VFP3-LABEL: t3:
+; VFP3: vnmla.f64
+
 ; NEON-LABEL: t3:
 ; NEON: vnmla.f64
 
@@ -84,6 +96,9 @@ entry:
 ; VFP2-LABEL: t4:
 ; VFP2: vnmla.f64
 
+; VFP3-LABEL: t4:
+; VFP3: vnmla.f64
+
 ; NEON-LABEL: t4:
 ; NEON: vnmla.f64
 
@@ -99,3 +114,53 @@ entry:
         %2 = fsub double %1, %acc
 	ret double %2
 }
+
+define double @t5(double %acc, double %a, double %b) nounwind {
+entry:
+; VFP2-LABEL: t5:
+; VFP2: vnmla.f64
+
+; VFP3-LABEL: t5:
+; VFP3: vnmla.f64
+
+; NEON-LABEL: t5:
+; NEON: vnmla.f64
+
+; A8U-LABEL: t5:
+; A8U: vmul.f64 d
+; A8U: vsub.f64 d
+
+; A8-LABEL: t5:
+; A8: vmul.f64 d
+; A8: vsub.f64 d
+
+  %0 = fsub double -0.0, %acc
+  %1 = fmul double %a, %b
+  %2 = fsub double %0, %1
+  ret double %2
+}
+
+define float @t6(float %acc, float %a, float %b) nounwind {
+entry:
+; VFP2-LABEL: t6:
+; VFP2: vnmla.f32
+
+; VFP3-LABEL: t6:
+; VFP3: vnmla.f32
+
+; NEON-LABEL: t6:
+; NEON: vnmla.f32
+
+; A8U-LABEL: t6:
+; A8U: vmul.f32 d
+; A8U: vsub.f32 d
+
+; A8-LABEL: t6:
+; A8: vmul.f32 s
+; A8: vsub.f32 s
+
+  %0 = fsub float -0.0, %acc
+  %1 = fmul float %a, %b
+  %2 = fsub float %0, %1
+  ret float %2
+}