[PATCH] D23583: [AArch64] Add feature has-fast-fma
Evandro Menezes via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 16 15:09:17 PDT 2016
evandro created this revision.
evandro added a reviewer: t.p.northover.
evandro added a subscriber: llvm-commits.
evandro set the repository for this revision to rL LLVM.
Herald added subscribers: rengolin, aemerson.
Use FMA combining aggressively if, for a given target, FMA is much faster than FMUL and FADD combined.
Repository:
rL LLVM
https://reviews.llvm.org/D23583
Files:
llvm/lib/Target/AArch64/AArch64.td
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64ISelLowering.h
llvm/lib/Target/AArch64/AArch64Subtarget.h
Index: llvm/lib/Target/AArch64/AArch64Subtarget.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -83,6 +83,7 @@
bool HasMacroOpFusion = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
+ bool HasFastFMA = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
@@ -190,6 +191,7 @@
}
bool hasMacroOpFusion() const { return HasMacroOpFusion; }
bool useRSqrt() const { return UseRSqrt; }
+ bool hasFastFMA() const { return HasFastFMA; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -333,6 +333,9 @@
/// returns true, otherwise fmuladd is expanded to fmul + fadd.
bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
+ /// Return true is FMA operations should be used aggressively..
+ bool enableAggressiveFMAFusion(EVT VT) const override;
+
const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override;
/// \brief Returns false if N is a bit extraction pattern of (X >> C) & Mask.
Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7374,6 +7374,10 @@
return false;
}
+bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+ return Subtarget->hasFastFMA() && isFMAFasterThanFMulAndFAdd(VT);
+}
+
const MCPhysReg *
AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const {
// LR is a callee-save register, but we must treat it as clobbered by any call
Index: llvm/lib/Target/AArch64/AArch64.td
===================================================================
--- llvm/lib/Target/AArch64/AArch64.td
+++ llvm/lib/Target/AArch64/AArch64.td
@@ -105,6 +105,9 @@
def FeatureUseRSqrt : SubtargetFeature<
"use-reverse-square-root", "UseRSqrt", "true", "Use reverse square root">;
+def FeatureHasFastFMA : SubtargetFeature<
+ "has-fast-fma", "HasFastFMA", "true", "Use FMA aggressively">;
+
//===----------------------------------------------------------------------===//
// Architectures.
//
@@ -222,6 +225,7 @@
FeatureCrypto,
FeatureCustomCheapAsMoveHandling,
FeatureFPARMv8,
+ FeatureHasFastFMA,
FeatureNEON,
FeaturePerfMon,
FeaturePostRAScheduler,
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D23583.68265.patch
Type: text/x-patch
Size: 3034 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160816/07af66af/attachment.bin>
More information about the llvm-commits
mailing list