[llvm-branch-commits] [llvm-branch] r91674 - in /llvm/branches/Apple/Zoidberg/lib/Target/X86: X86.td X86InstrInfo.cpp X86InstrInfo.td X86InstrSSE.td X86Subtarget.cpp X86Subtarget.h
Evan Cheng
evan.cheng at apple.com
Thu Dec 17 23:57:45 PST 2009
Author: evancheng
Date: Fri Dec 18 01:57:45 2009
New Revision: 91674
URL: http://llvm.org/viewvc/llvm-project?rev=91674&view=rev
Log:
Merge 91672.
Modified:
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86.td
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.cpp
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.td
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.cpp
llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.h
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86.td?rev=91674&r1=91673&r2=91674&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86.td (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86.td Fri Dec 18 01:57:45 2009
@@ -57,6 +57,8 @@
"Support 64-bit instructions">;
def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
"Bit testing of memory is slow">;
+def FeatureBreakSSEDep : SubtargetFeature<"break-sse-dep", "BreakSSEDep","true",
+ "Should break SSE partial update dep with load / xorps">;
def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true",
"Support SSE 4a instructions">;
@@ -86,17 +88,27 @@
def : Proc<"pentium3", [FeatureSSE1]>;
def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
-def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
-def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"x86-64", [FeatureSSE2, Feature64Bit, FeatureSlowBTMem,
+ FeatureBreakSSEDep]>;
+def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem,
+ FeatureBreakSSEDep]>;
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem,
+ FeatureBreakSSEDep]>;
+def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem,
+ FeatureBreakSSEDep]>;
+def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem,
+ FeatureBreakSSEDep]>;
+def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem,
+ FeatureBreakSSEDep]>;
+def : Proc<"atom", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem,
+ FeatureBreakSSEDep]>;
+def : Proc<"corei7", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureBreakSSEDep]>;
+def : Proc<"nehalem", [FeatureSSE42, Feature64Bit, FeatureSlowBTMem,
+ FeatureBreakSSEDep]>;
// Sandy Bridge does not have FMA
-def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit]>;
+def : Proc<"sandybridge", [FeatureSSE42, FeatureAVX, Feature64Bit,
+ FeatureBreakSSEDep]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.cpp?rev=91674&r1=91673&r2=91674&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.cpp Fri Dec 18 01:57:45 2009
@@ -2353,6 +2353,23 @@
// Check switch flag
if (NoFusing) return NULL;
+ if (TM.getSubtarget<X86Subtarget>().shouldBreakSSEDep())
+ switch (MI->getOpcode()) {
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr_Int:
+ case X86::ROUNDSSr_Int:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return 0;
+ }
+
const MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned Size = MFI->getObjectSize(FrameIndex);
unsigned Alignment = MFI->getObjectAlignment(FrameIndex);
@@ -2388,6 +2405,23 @@
// Check switch flag
if (NoFusing) return NULL;
+ if (TM.getSubtarget<X86Subtarget>().shouldBreakSSEDep())
+ switch (MI->getOpcode()) {
+ case X86::CVTSD2SSrr:
+ case X86::Int_CVTSD2SSrr:
+ case X86::CVTSS2SDrr:
+ case X86::Int_CVTSS2SDrr:
+ case X86::RCPSSr:
+ case X86::RCPSSr_Int:
+ case X86::ROUNDSDr_Int:
+ case X86::ROUNDSSr_Int:
+ case X86::RSQRTSSr:
+ case X86::RSQRTSSr_Int:
+ case X86::SQRTSSr:
+ case X86::SQRTSSr_Int:
+ return 0;
+ }
+
// Determine the alignment of the load.
unsigned Alignment = 0;
if (LoadMI->hasOneMemOperand())
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.td?rev=91674&r1=91673&r2=91674&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrInfo.td Fri Dec 18 01:57:45 2009
@@ -295,6 +295,8 @@
def OptForSpeed : Predicate<"!OptForSize">;
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">;
+def SSEBreakDep : Predicate<"Subtarget->shouldBreakSSEDep() && !OptForSize">;
+def NoSSEBreakDep: Predicate<"!Subtarget->shouldBreakSSEDep() || OptForSize">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td?rev=91674&r1=91673&r2=91674&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86InstrSSE.td Fri Dec 18 01:57:45 2009
@@ -806,9 +806,10 @@
}
// Scalar operation, mem.
- def SSm : SSI<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
+ def SSm : I<opc, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
!strconcat(OpcodeStr, "ss\t{$src, $dst|$dst, $src}"),
- [(set FR32:$dst, (OpNode (load addr:$src)))]>;
+ [(set FR32:$dst, (OpNode (load addr:$src)))]>, XS,
+ Requires<[HasSSE1, NoSSEBreakDep]>;
// Vector operation, reg.
def PSr : PSI<opc, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -1098,9 +1099,10 @@
def CVTSD2SSrr : SDI<0x5A, MRMSrcReg, (outs FR32:$dst), (ins FR64:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (fround FR64:$src))]>;
-def CVTSD2SSrm : SDI<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
+def CVTSD2SSrm : I<0x5A, MRMSrcMem, (outs FR32:$dst), (ins f64mem:$src),
"cvtsd2ss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (fround (loadf64 addr:$src)))]>;
+ [(set FR32:$dst, (fround (loadf64 addr:$src)))]>, XD,
+ Requires<[HasSSE2, NoSSEBreakDep]>;
def CVTSI2SDrr : SDI<0x2A, MRMSrcReg, (outs FR64:$dst), (ins GR32:$src),
"cvtsi2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (sint_to_fp GR32:$src))]>;
@@ -1137,7 +1139,10 @@
def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
"cvtss2sd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (extloadf32 addr:$src))]>, XS,
- Requires<[HasSSE2]>;
+ Requires<[HasSSE2, NoSSEBreakDep]>;
+
+def : Pat<(extloadf32 addr:$src),
+ (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[SSEBreakDep]>;
// Match intrinsics which expect XMM operand(s).
def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@@ -3175,13 +3180,14 @@
OpSize;
// Vector intrinsic operation, mem
- def PSm_Int : SS4AIi8<opcps, MRMSrcMem,
+ def PSm_Int : Ii8<opcps, MRMSrcMem,
(outs VR128:$dst), (ins f128mem:$src1, i32i8imm:$src2),
!strconcat(OpcodeStr,
"ps\t{$src2, $src1, $dst|$dst, $src1, $src2}"),
[(set VR128:$dst,
(V4F32Int (memopv4f32 addr:$src1),imm:$src2))]>,
- OpSize;
+ TA, OpSize,
+ Requires<[HasSSE41, NoSSEBreakDep]>;
// Vector intrinsic operation, reg
def PDr_Int : SS4AIi8<opcpd, MRMSrcReg,
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.cpp?rev=91674&r1=91673&r2=91674&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.cpp Fri Dec 18 01:57:45 2009
@@ -266,6 +266,7 @@
unsigned Model = 0;
DetectFamilyModel(EAX, Family, Model);
IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+ BreakSSEDep = IsIntel;
GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
@@ -286,6 +287,7 @@
, HasFMA3(false)
, HasFMA4(false)
, IsBTMemSlow(false)
+ , BreakSSEDep(false)
, DarwinVers(0)
, stackAlignment(8)
// FIXME: this is a known good value for Yonah. How about others?
Modified: llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.h?rev=91674&r1=91673&r2=91674&view=diff
==============================================================================
--- llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/branches/Apple/Zoidberg/lib/Target/X86/X86Subtarget.h Fri Dec 18 01:57:45 2009
@@ -77,6 +77,14 @@
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
+
+ /// BreakSSEDep - True if codegen should unfold load or insert xorps / pxor
+ /// to break register dependency for a partial register update SSE
+ /// instruction. This is needed for instructions such as CVTSS2SD which
+ /// only update the lower part of the register, and the result of the updated
+ /// part does not depend on the contents of the destination before the
+ /// instruction, and the non-updated portion of the register is not used.
+ bool BreakSSEDep;
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
@@ -142,6 +150,7 @@
bool hasFMA3() const { return HasFMA3; }
bool hasFMA4() const { return HasFMA4; }
bool isBTMemSlow() const { return IsBTMemSlow; }
+ bool shouldBreakSSEDep() const { return BreakSSEDep; }
bool isTargetDarwin() const { return TargetType == isDarwin; }
bool isTargetELF() const { return TargetType == isELF; }
More information about the llvm-branch-commits
mailing list