[PATCH] D23583: [AArch64] Add feature has-fast-fma

Tue Aug 16 15:09:17 PDT 2016

evandro created this revision.
evandro added a reviewer: t.p.northover.
evandro added a subscriber: llvm-commits.
evandro set the repository for this revision to rL LLVM.
Herald added subscribers: rengolin, aemerson.

Use FMA combining aggressively if, for a given target, FMA is much faster than FMUL and FADD combined.


Repository:
  rL LLVM

https://reviews.llvm.org/D23583

Files:
  llvm/lib/Target/AArch64/AArch64.td
  llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
  llvm/lib/Target/AArch64/AArch64ISelLowering.h
  llvm/lib/Target/AArch64/AArch64Subtarget.h

Index: llvm/lib/Target/AArch64/AArch64Subtarget.h
===================================================================

--- llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -83,6 +83,7 @@
   bool HasMacroOpFusion = false;
   bool DisableLatencySchedHeuristic = false;
   bool UseRSqrt = false;
+  bool HasFastFMA = false;
   uint8_t MaxInterleaveFactor = 2;
   uint8_t VectorInsertExtractBaseCost = 3;
   uint16_t CacheLineSize = 0;
@@ -190,6 +191,7 @@
   }
   bool hasMacroOpFusion() const { return HasMacroOpFusion; }
   bool useRSqrt() const { return UseRSqrt; }
+  bool hasFastFMA() const { return HasFastFMA; }
   unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
   unsigned getVectorInsertExtractBaseCost() const {
     return VectorInsertExtractBaseCost;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -333,6 +333,9 @@
   /// returns true, otherwise fmuladd is expanded to fmul + fadd.
   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
 
+  /// Return true is FMA operations should be used aggressively..
+  bool enableAggressiveFMAFusion(EVT VT) const override;
+
   const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
 
   /// \brief Returns false if N is a bit extraction pattern of (X >> C) & Mask.
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7374,6 +7374,10 @@
   return false;
 }
 
+bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+  return Subtarget->hasFastFMA() && isFMAFasterThanFMulAndFAdd(VT);
+}
+
 const MCPhysReg *
 AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
   // LR is a callee-save register, but we must treat it as clobbered by any call
Index: llvm/lib/Target/AArch64/AArch64.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64.td
+++ llvm/lib/Target/AArch64/AArch64.td
@@ -105,6 +105,9 @@
 def FeatureUseRSqrt : SubtargetFeature<
     "use-reverse-square-root", "UseRSqrt", "true", "Use reverse square root">;
 
+def FeatureHasFastFMA : SubtargetFeature<
+    "has-fast-fma", "HasFastFMA", "true", "Use FMA aggressively">;
+
 //===----------------------------------------------------------------------===//
 // Architectures.
 //
@@ -222,6 +225,7 @@
                                     FeatureCrypto,
                                     FeatureCustomCheapAsMoveHandling,
                                     FeatureFPARMv8,
+                                    FeatureHasFastFMA,
                                     FeatureNEON,
                                     FeaturePerfMon,
                                     FeaturePostRAScheduler,


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D23583.68265.patch
Type: text/x-patch
Size: 3034 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160816/07af66af/attachment.bin>