[PATCH] D40177: performance improvements for ThunderX2 T99
Stefan Teleman via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 17 07:14:56 PST 2017
steleman created this revision.
Herald added a subscriber: javed.absar.
This changeset causes performance improvements for the Cavium ThunderX2T99 micro-arch. The changeset is specific to T99. It does not affect any other micro-arch.
Tested with SPECcpu2017 and libquantum.
As an example, for the performance gains on libquantum, please see here:
https://docs.google.com/spreadsheets/d/1Lo1o2E1NjrpkwS7DvYYWsiVvPdd93h7KBaqeptMrZPY/edit?usp=sharing
Repository:
rL LLVM
https://reviews.llvm.org/D40177
Files:
lib/Target/AArch64/AArch64ISelLowering.cpp
lib/Target/AArch64/AArch64ISelLowering.h
lib/Target/AArch64/AArch64SchedThunderX2T99.td
Index: lib/Target/AArch64/AArch64ISelLowering.cpp
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.cpp
+++ lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -10978,6 +10978,11 @@
return OptSize && !VT.isVector();
}
+bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const {
+ return Subtarget->getProcFamily() == AArch64Subtarget::ThunderX2T99 &&
+ VT.isFloatingPoint();
+}
+
unsigned
AArch64TargetLowering::getVaListSizeInBits(const DataLayout &DL) const {
if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
Index: lib/Target/AArch64/AArch64ISelLowering.h
===================================================================
--- lib/Target/AArch64/AArch64ISelLowering.h
+++ lib/Target/AArch64/AArch64ISelLowering.h
@@ -455,6 +455,9 @@
return true;
}
+ /// Enable aggressive FMA fusion on targets that want it.
+ bool enableAggressiveFMAFusion(EVT VT) const override;
+
/// Returns the size of the platform's va_list object.
unsigned getVaListSizeInBits(const DataLayout &DL) const override;
Index: lib/Target/AArch64/AArch64SchedThunderX2T99.td
===================================================================
--- lib/Target/AArch64/AArch64SchedThunderX2T99.td
+++ lib/Target/AArch64/AArch64SchedThunderX2T99.td
@@ -22,7 +22,7 @@
let LoadLatency = 4; // Optimistic load latency.
let MispredictPenalty = 12; // Extra cycles for mispredicted branch.
// Determined via a mix of micro-arch details and experimentation.
- let LoopMicroOpBufferSize = 32;
+ let LoopMicroOpBufferSize = 128;
let PostRAScheduler = 1; // Using PostRA sched.
let CompleteModel = 1;
@@ -391,7 +391,8 @@
def : WriteRes<WriteHint, []> { let Latency = 1; }
def : WriteRes<WriteAtomic, []> {
- let Unsupported = 1;
+ let Unsupported = 0;
+ let Latency = 4;
let NumMicroOps = 2;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D40177.123336.patch
Type: text/x-patch
Size: 1943 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20171117/c994b331/attachment.bin>
More information about the llvm-commits
mailing list