[PATCH] D16837: Disable the vzeroupper insertion pass on PS4
Yunzhong Gao via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 2 19:32:12 PST 2016
ygao created this revision.
ygao added a reviewer: hfinkel.
ygao added subscribers: silvas, llvm-commits.
Hi,
This patch re-implements the work to disable the vzeroupper insertion pass
on PS4 based on review feedback from Hal and Sean.
I am not sure whether there are other processors that behave like Jaguar
when it comes to writing YMM registers.
http://reviews.llvm.org/D16837
Files:
lib/Target/X86/X86.td
lib/Target/X86/X86Subtarget.cpp
lib/Target/X86/X86Subtarget.h
lib/Target/X86/X86VZeroUpper.cpp
test/CodeGen/X86/avx-vzeroupper.ll
Index: test/CodeGen/X86/avx-vzeroupper.ll
===================================================================
--- test/CodeGen/X86/avx-vzeroupper.ll
+++ test/CodeGen/X86/avx-vzeroupper.ll
@@ -1,4 +1,9 @@
; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s
+; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mattr=+avx,+fast-partial-ymm-write | FileCheck --check-prefix=FASTYMM %s
+; RUN: llc < %s -x86-use-vzeroupper -mtriple=x86_64-apple-darwin -mcpu=btver2 | FileCheck --check-prefix=BTVER2 %s
+
+; FASTYMM-NOT: vzeroupper
+; BTVER2-NOT: vzeroupper
declare i32 @foo()
declare <4 x float> @do_sse(<4 x float>)
Index: lib/Target/X86/X86VZeroUpper.cpp
===================================================================
--- lib/Target/X86/X86VZeroUpper.cpp
+++ lib/Target/X86/X86VZeroUpper.cpp
@@ -248,7 +248,7 @@
/// vzeroupper instructions before function calls.
bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) {
const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
- if (!ST.hasAVX() || ST.hasAVX512())
+ if (!ST.hasAVX() || ST.hasAVX512() || ST.hasFastPartialYMMWrite())
return false;
TII = ST.getInstrInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
Index: lib/Target/X86/X86Subtarget.h
===================================================================
--- lib/Target/X86/X86Subtarget.h
+++ lib/Target/X86/X86Subtarget.h
@@ -189,6 +189,10 @@
/// the stack pointer. This is an optimization for Intel Atom processors.
bool UseLeaForSP;
+ /// True if there is no performance penalty to writing only the lower parts
+ /// of a YMM register without clearing the upper part.
+ bool HasFastPartialYMMWrite;
+
/// True if 8-bit divisions are significantly faster than
/// 32-bit divisions and should be used when possible.
bool HasSlowDivide32;
@@ -421,6 +425,7 @@
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
bool useLeaForSP() const { return UseLeaForSP; }
+ bool hasFastPartialYMMWrite() const { return HasFastPartialYMMWrite; }
bool hasSlowDivide32() const { return HasSlowDivide32; }
bool hasSlowDivide64() const { return HasSlowDivide64; }
bool padShortFunctions() const { return PadShortFunctions; }
Index: lib/Target/X86/X86Subtarget.cpp
===================================================================
--- lib/Target/X86/X86Subtarget.cpp
+++ lib/Target/X86/X86Subtarget.cpp
@@ -285,6 +285,7 @@
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
UseLeaForSP = false;
+ HasFastPartialYMMWrite = false;
HasSlowDivide32 = false;
HasSlowDivide64 = false;
PadShortFunctions = false;
Index: lib/Target/X86/X86.td
===================================================================
--- lib/Target/X86/X86.td
+++ lib/Target/X86/X86.td
@@ -239,6 +239,11 @@
def FeatureSoftFloat
: SubtargetFeature<"soft-float", "UseSoftFloat", "true",
"Use software floating point features.">;
+// On at least some AMD processors, there is no performance hazard to writing
+// only the lower parts of a YMM register without clearing the upper part.
+def FeatureFastPartialYMMWrite
+ : SubtargetFeature<"fast-partial-ymm-write", "HasFastPartialYMMWrite",
+ "true", "Partial writes to YMM registers are fast">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -596,7 +601,8 @@
FeatureXSAVE,
FeatureXSAVEOPT,
FeatureSlowSHLD,
- FeatureLAHFSAHF
+ FeatureLAHFSAHF,
+ FeatureFastPartialYMMWrite
]>;
// Bulldozer
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D16837.46733.patch
Type: text/x-patch
Size: 3663 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160203/9d0c213a/attachment.bin>
More information about the llvm-commits
mailing list