[llvm] r246585 - rename "slow-unaligned-mem-under-32" to slow-unaligned-mem-16" (NFCI)
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 1 13:51:51 PDT 2015
Author: spatel
Date: Tue Sep 1 15:51:51 2015
New Revision: 246585
URL: http://llvm.org/viewvc/llvm-project?rev=246585&view=rev
Log:
rename "slow-unaligned-mem-under-32" to slow-unaligned-mem-16" (NFCI)
This is a follow-on suggested by:
http://reviews.llvm.org/D12154 ( http://reviews.llvm.org/rL245729 )
http://reviews.llvm.org/D10662 ( http://reviews.llvm.org/rL245075 )
This makes the attribute name match most of the existing lowering logic
and regression test expectations.
But the current use of this attribute is inconsistent; see the FIXME
comment for "allowsMisalignedMemoryAccesses()". That change will
result in functional changes and should be coming soon.
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.h
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=246585&r1=246584&r2=246585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Tue Sep 1 15:51:51 2015
@@ -79,9 +79,10 @@ def FeatureSlowBTMem : SubtargetFeature<
"Bit testing of memory is slow">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
-def FeatureSlowUAMem : SubtargetFeature<"slow-unaligned-mem-under-32",
- "IsUAMemUnder32Slow", "true",
- "Slow unaligned 16-byte-or-less memory access">;
+// FIXME: This should not apply to CPUs that do not have SSE.
+def FeatureSlowUAMem16 : SubtargetFeature<"slow-unaligned-mem-16",
+ "IsUAMem16Slow", "true",
+ "Slow unaligned 16-byte memory access">;
def FeatureSlowUAMem32 : SubtargetFeature<"slow-unaligned-mem-32",
"IsUAMem32Slow", "true",
"Slow unaligned 32-byte memory access">;
@@ -209,42 +210,45 @@ def ProcIntelSLM : SubtargetFeature<"sl
class Proc<string Name, list<SubtargetFeature> Features>
: ProcessorModel<Name, GenericModel, Features>;
-def : Proc<"generic", [FeatureSlowUAMem]>;
-def : Proc<"i386", [FeatureSlowUAMem]>;
-def : Proc<"i486", [FeatureSlowUAMem]>;
-def : Proc<"i586", [FeatureSlowUAMem]>;
-def : Proc<"pentium", [FeatureSlowUAMem]>;
-def : Proc<"pentium-mmx", [FeatureSlowUAMem, FeatureMMX]>;
-def : Proc<"i686", [FeatureSlowUAMem]>;
-def : Proc<"pentiumpro", [FeatureSlowUAMem, FeatureCMOV]>;
-def : Proc<"pentium2", [FeatureSlowUAMem, FeatureMMX, FeatureCMOV]>;
-def : Proc<"pentium3", [FeatureSlowUAMem, FeatureSSE1]>;
-def : Proc<"pentium3m", [FeatureSlowUAMem, FeatureSSE1, FeatureSlowBTMem]>;
-def : Proc<"pentium-m", [FeatureSlowUAMem, FeatureSSE2, FeatureSlowBTMem]>;
-def : Proc<"pentium4", [FeatureSlowUAMem, FeatureSSE2]>;
-def : Proc<"pentium4m", [FeatureSlowUAMem, FeatureSSE2, FeatureSlowBTMem]>;
+def : Proc<"generic", [FeatureSlowUAMem16]>;
+def : Proc<"i386", [FeatureSlowUAMem16]>;
+def : Proc<"i486", [FeatureSlowUAMem16]>;
+def : Proc<"i586", [FeatureSlowUAMem16]>;
+def : Proc<"pentium", [FeatureSlowUAMem16]>;
+def : Proc<"pentium-mmx", [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"i686", [FeatureSlowUAMem16]>;
+def : Proc<"pentiumpro", [FeatureSlowUAMem16, FeatureCMOV]>;
+def : Proc<"pentium2", [FeatureSlowUAMem16, FeatureMMX, FeatureCMOV]>;
+def : Proc<"pentium3", [FeatureSlowUAMem16, FeatureSSE1]>;
+def : Proc<"pentium3m", [FeatureSlowUAMem16, FeatureSSE1,
+ FeatureSlowBTMem]>;
+def : Proc<"pentium-m", [FeatureSlowUAMem16, FeatureSSE2,
+ FeatureSlowBTMem]>;
+def : Proc<"pentium4", [FeatureSlowUAMem16, FeatureSSE2]>;
+def : Proc<"pentium4m", [FeatureSlowUAMem16, FeatureSSE2,
+ FeatureSlowBTMem]>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
- [FeatureSlowUAMem, FeatureSSE3, FeatureSlowBTMem]>;
+ [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
// NetBurst.
-def : Proc<"prescott", [FeatureSlowUAMem, FeatureSSE3, FeatureSlowBTMem]>;
-def : Proc<"nocona", [FeatureSlowUAMem, FeatureSSE3, FeatureCMPXCHG16B,
+def : Proc<"prescott", [FeatureSlowUAMem16, FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSlowUAMem16, FeatureSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
// Intel Core 2 Solo/Duo.
def : ProcessorModel<"core2", SandyBridgeModel,
- [FeatureSlowUAMem, FeatureSSSE3, FeatureCMPXCHG16B,
+ [FeatureSlowUAMem16, FeatureSSSE3, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
def : ProcessorModel<"penryn", SandyBridgeModel,
- [FeatureSlowUAMem, FeatureSSE41, FeatureCMPXCHG16B,
+ [FeatureSlowUAMem16, FeatureSSE41, FeatureCMPXCHG16B,
FeatureSlowBTMem]>;
// Atom CPUs.
class BonnellProc<string Name> : ProcessorModel<Name, AtomModel, [
ProcIntelAtom,
- FeatureSlowUAMem,
+ FeatureSlowUAMem16,
FeatureSSSE3,
FeatureCMPXCHG16B,
FeatureMOVBE,
@@ -399,38 +403,38 @@ def : SkylakeProc<"skx">; // Legacy alia
// AMD CPUs.
-def : Proc<"k6", [FeatureSlowUAMem, FeatureMMX]>;
-def : Proc<"k6-2", [FeatureSlowUAMem, Feature3DNow]>;
-def : Proc<"k6-3", [FeatureSlowUAMem, Feature3DNow]>;
-def : Proc<"athlon", [FeatureSlowUAMem, Feature3DNowA,
+def : Proc<"k6", [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"k6-2", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"k6-3", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"athlon", [FeatureSlowUAMem16, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-tbird", [FeatureSlowUAMem, Feature3DNowA,
+def : Proc<"athlon-tbird", [FeatureSlowUAMem16, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-4", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
+def : Proc<"athlon-4", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-xp", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
+def : Proc<"athlon-xp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-mp", [FeatureSlowUAMem, FeatureSSE1, Feature3DNowA,
+def : Proc<"athlon-mp", [FeatureSlowUAMem16, FeatureSSE1, Feature3DNowA,
FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"k8", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
+def : Proc<"k8", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"opteron", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
+def : Proc<"opteron", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon64", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
+def : Proc<"athlon64", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon-fx", [FeatureSlowUAMem, FeatureSSE2, Feature3DNowA,
+def : Proc<"athlon-fx", [FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
Feature64Bit, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"k8-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
+def : Proc<"k8-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"opteron-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
+def : Proc<"opteron-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
-def : Proc<"athlon64-sse3", [FeatureSlowUAMem, FeatureSSE3, Feature3DNowA,
+def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
FeatureCMPXCHG16B, FeatureSlowBTMem,
FeatureSlowSHLD]>;
def : Proc<"amdfam10", [FeatureSSE4A,
@@ -483,12 +487,12 @@ def : Proc<"bdver4", [FeatureAV
FeatureTBM, FeatureFMA, FeatureSSE4A,
FeatureFSGSBase]>;
-def : Proc<"geode", [FeatureSlowUAMem, Feature3DNowA]>;
+def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>;
-def : Proc<"winchip-c6", [FeatureSlowUAMem, FeatureMMX]>;
-def : Proc<"winchip2", [FeatureSlowUAMem, Feature3DNow]>;
-def : Proc<"c3", [FeatureSlowUAMem, Feature3DNow]>;
-def : Proc<"c3-2", [FeatureSlowUAMem, FeatureSSE1]>;
+def : Proc<"winchip-c6", [FeatureSlowUAMem16, FeatureMMX]>;
+def : Proc<"winchip2", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3", [FeatureSlowUAMem16, Feature3DNow]>;
+def : Proc<"c3-2", [FeatureSlowUAMem16, FeatureSSE1]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=246585&r1=246584&r2=246585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Tue Sep 1 15:51:51 2015
@@ -1869,7 +1869,7 @@ X86TargetLowering::getOptimalMemOpType(u
if ((!IsMemset || ZeroMemset) &&
!F->hasFnAttribute(Attribute::NoImplicitFloat)) {
if (Size >= 16 &&
- (!Subtarget->isUnalignedMemUnder32Slow() ||
+ (!Subtarget->isUnalignedMem16Slow() ||
((DstAlign == 0 || DstAlign >= 16) &&
(SrcAlign == 0 || SrcAlign >= 16)))) {
if (Size >= 32) {
@@ -1916,7 +1916,9 @@ X86TargetLowering::allowsMisalignedMemor
if (VT.getSizeInBits() == 256)
*Fast = !Subtarget->isUnalignedMem32Slow();
else
- *Fast = !Subtarget->isUnalignedMemUnder32Slow();
+ // FIXME: We should always return that 8-byte and under accesses are fast.
+ // That is what other x86 lowering code assumes.
+ *Fast = !Subtarget->isUnalignedMem16Slow();
}
return true;
}
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.cpp?rev=246585&r1=246584&r2=246585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.cpp Tue Sep 1 15:51:51 2015
@@ -5511,7 +5511,7 @@ bool X86InstrInfo::unfoldMemoryOperand(M
// TODO: Check if 32-byte or greater accesses are slow too?
if (!MI->hasOneMemOperand() &&
RC == &X86::VR128RegClass &&
- Subtarget.isUnalignedMemUnder32Slow())
+ Subtarget.isUnalignedMem16Slow())
// Without memoperands, loadRegFromAddr and storeRegToStackSlot will
// conservatively assume the address is unaligned. That's bad for
// performance.
@@ -5659,7 +5659,7 @@ X86InstrInfo::unfoldMemoryOperand(Select
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- Subtarget.isUnalignedMemUnder32Slow())
+ Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned load.
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
@@ -5704,7 +5704,7 @@ X86InstrInfo::unfoldMemoryOperand(Select
cast<MachineSDNode>(N)->memoperands_end());
if (!(*MMOs.first) &&
RC == &X86::VR128RegClass &&
- Subtarget.isUnalignedMemUnder32Slow())
+ Subtarget.isUnalignedMem16Slow())
// Do not introduce a slow unaligned store.
return false;
// FIXME: If a VR128 can have size 32, we should be checking if a 32-byte
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=246585&r1=246584&r2=246585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Tue Sep 1 15:51:51 2015
@@ -197,7 +197,7 @@ void X86Subtarget::initSubtargetFeatures
// introduced with Intel's Nehalem/Silvermont and AMD's Family10h
// micro-architectures respectively.
if (hasSSE42() || hasSSE4A())
- IsUAMemUnder32Slow = false;
+ IsUAMem16Slow = false;
InstrItins = getInstrItineraryForCPU(CPUName);
@@ -262,7 +262,7 @@ void X86Subtarget::initializeEnvironment
HasMPX = false;
IsBTMemSlow = false;
IsSHLDSlow = false;
- IsUAMemUnder32Slow = false;
+ IsUAMem16Slow = false;
IsUAMem32Slow = false;
HasSSEUnalignedMem = false;
HasCmpxchg16b = false;
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=246585&r1=246584&r2=246585&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Tue Sep 1 15:51:51 2015
@@ -146,8 +146,8 @@ protected:
/// True if SHLD instructions are slow.
bool IsSHLDSlow;
- /// True if unaligned memory accesses of 16-bytes or smaller are slow.
- bool IsUAMemUnder32Slow;
+ /// True if unaligned memory accesses of 16-bytes are slow.
+ bool IsUAMem16Slow;
/// True if unaligned memory accesses of 32-bytes are slow.
bool IsUAMem32Slow;
@@ -357,7 +357,7 @@ public:
bool hasRDSEED() const { return HasRDSEED; }
bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
- bool isUnalignedMemUnder32Slow() const { return IsUAMemUnder32Slow; }
+ bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
bool isUnalignedMem32Slow() const { return IsUAMem32Slow; }
bool hasSSEUnalignedMem() const { return HasSSEUnalignedMem; }
bool hasCmpxchg16b() const { return HasCmpxchg16b; }
More information about the llvm-commits
mailing list