[llvm] r315862 - [X86] Remove the SlowBTMem feature flag entirely
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 15 09:57:33 PDT 2017
Author: ctopper
Date: Sun Oct 15 09:57:33 2017
New Revision: 315862
URL: http://llvm.org/viewvc/llvm-project?rev=315862&view=rev
Log:
[X86] Remove the SlowBTMem feature flag entirely
Turns out we have no patterns on the instructions that were using this feature flag for other reasons. These instructions are slow on all modern CPUs so it seems unlikely that we will spend any effort supporting these instructions going forward. So we might as well just kill of the feature flag and just fix up the comments.
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/lib/Target/X86/X86InstrInfo.td
llvm/trunk/lib/Target/X86/X86Subtarget.cpp
llvm/trunk/lib/Target/X86/X86Subtarget.h
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=315862&r1=315861&r2=315862&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Sun Oct 15 09:57:33 2017
@@ -95,8 +95,6 @@ def Feature64Bit : SubtargetFeature<"6
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[Feature64Bit]>;
-def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
- "Bit testing of memory is slow">;
def FeatureSlowSHLD : SubtargetFeature<"slow-shld", "IsSHLDSlow", "true",
"SHLD instruction is slow">;
def FeatureSlowPMULLD : SubtargetFeature<"slow-pmulld", "IsPMULLDSlow", "true",
@@ -336,7 +334,7 @@ def : Proc<"pentium2", [FeatureX8
def : Proc<"pentium3", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
FeatureSSE1, FeatureFXSR]>;
def : Proc<"pentium3m", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE1, FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureSSE1, FeatureFXSR]>;
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
// The intent is to enable it for pentium4 which is the current default
@@ -350,7 +348,7 @@ def : Proc<"pentium3m", [FeatureX8
def : ProcessorModel<"pentium-m", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureSSE2, FeatureFXSR]>;
def : ProcessorModel<"pentium4", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
@@ -358,7 +356,7 @@ def : ProcessorModel<"pentium4", Generic
def : ProcessorModel<"pentium4m", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureSSE2, FeatureFXSR]>;
// Intel Quark.
def : Proc<"lakemont", []>;
@@ -366,20 +364,19 @@ def : Proc<"lakemont", []>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureFXSR]>;
// NetBurst.
def : ProcessorModel<"prescott", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureSlowBTMem]>;
+ FeatureFXSR]>;
def : ProcessorModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureSlowUAMem16,
FeatureMMX,
FeatureSSE3,
FeatureFXSR,
- FeatureCMPXCHG16B,
- FeatureSlowBTMem
+ FeatureCMPXCHG16B
]>;
// Intel Core 2 Solo/Duo.
@@ -390,7 +387,6 @@ def : ProcessorModel<"core2", SandyBridg
FeatureSSSE3,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
@@ -401,7 +397,6 @@ def : ProcessorModel<"penryn", SandyBrid
FeatureSSE41,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeatureLAHFSAHF,
FeatureMacroFusion
]>;
@@ -416,7 +411,6 @@ class BonnellProc<string Name> : Process
FeatureFXSR,
FeatureCMPXCHG16B,
FeatureMOVBE,
- FeatureSlowBTMem,
FeatureLEAForSP,
FeatureSlowDivide32,
FeatureSlowDivide64,
@@ -444,7 +438,6 @@ class SilvermontProc<string Name> : Proc
FeaturePRFCHW,
FeatureSlowLEA,
FeatureSlowIncDec,
- FeatureSlowBTMem,
FeatureSlowPMULLD,
FeatureLAHFSAHF
]>;
@@ -466,7 +459,6 @@ class GoldmontProc<string Name> : Proces
FeatureSlowTwoMemOps,
FeatureSlowLEA,
FeatureSlowIncDec,
- FeatureSlowBTMem,
FeatureLAHFSAHF,
FeatureMPX,
FeatureSHA,
@@ -488,7 +480,6 @@ class NehalemProc<string Name> : Process
FeatureSSE42,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeaturePOPCNT,
FeatureLAHFSAHF,
FeatureMacroFusion
@@ -504,7 +495,6 @@ class WestmereProc<string Name> : Proces
FeatureSSE42,
FeatureFXSR,
FeatureCMPXCHG16B,
- FeatureSlowBTMem,
FeaturePOPCNT,
FeatureAES,
FeaturePCLMUL,
@@ -547,7 +537,6 @@ def SNBFeatures : ProcessorFeatures<[],
class SandyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
SNBFeatures.Value, [
- FeatureSlowBTMem,
FeatureSlowUAMem32
]>;
def : SandyBridgeProc<"sandybridge">;
@@ -561,7 +550,6 @@ def IVBFeatures : ProcessorFeatures<SNBF
class IvyBridgeProc<string Name> : ProcModel<Name, SandyBridgeModel,
IVBFeatures.Value, [
- FeatureSlowBTMem,
FeatureSlowUAMem32
]>;
def : IvyBridgeProc<"ivybridge">;
@@ -579,8 +567,7 @@ def HSWFeatures : ProcessorFeatures<IVBF
class HaswellProc<string Name> : ProcModel<Name, HaswellModel,
HSWFeatures.Value, [
- ProcIntelHSW,
- FeatureSlowBTMem
+ ProcIntelHSW
]>;
def : HaswellProc<"haswell">;
def : HaswellProc<"core-avx2">; // Legacy alias.
@@ -591,8 +578,7 @@ def BDWFeatures : ProcessorFeatures<HSWF
]>;
class BroadwellProc<string Name> : ProcModel<Name, HaswellModel,
BDWFeatures.Value, [
- ProcIntelBDW,
- FeatureSlowBTMem
+ ProcIntelBDW
]>;
def : BroadwellProc<"broadwell">;
@@ -608,8 +594,7 @@ def SKLFeatures : ProcessorFeatures<BDWF
class SkylakeClientProc<string Name> : ProcModel<Name, SkylakeClientModel,
SKLFeatures.Value, [
- ProcIntelSKL,
- FeatureSlowBTMem
+ ProcIntelSKL
]>;
def : SkylakeClientProc<"skylake">;
@@ -632,7 +617,6 @@ def KNLFeatures : ProcessorFeatures<IVBF
class KnightsLandingProc<string Name> : ProcModel<Name, HaswellModel,
KNLFeatures.Value, [
ProcIntelKNL,
- FeatureSlowBTMem,
FeatureSlowTwoMemOps,
FeatureFastPartialYMMorZMMWrite
]>;
@@ -641,7 +625,6 @@ def : KnightsLandingProc<"knl">;
class KnightsMillProc<string Name> : ProcModel<Name, HaswellModel,
KNLFeatures.Value, [
ProcIntelKNL,
- FeatureSlowBTMem,
FeatureSlowTwoMemOps,
FeatureFastPartialYMMorZMMWrite
]>;
@@ -659,8 +642,7 @@ def SKXFeatures : ProcessorFeatures<SKLF
class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
SKXFeatures.Value, [
- ProcIntelSKX,
- FeatureSlowBTMem
+ ProcIntelSKX
]>;
def : SkylakeServerProc<"skylake-avx512">;
def : SkylakeServerProc<"skx">; // Legacy alias.
@@ -673,8 +655,7 @@ def CNLFeatures : ProcessorFeatures<SKXF
class CannonlakeProc<string Name> : ProcModel<Name, HaswellModel,
CNLFeatures.Value, [
- ProcIntelCNL,
- FeatureSlowBTMem
+ ProcIntelCNL
]>;
def : CannonlakeProc<"cannonlake">;
@@ -684,46 +665,43 @@ def : Proc<"k6", [FeatureX8
def : Proc<"k6-2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"k6-3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"athlon", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"athlon-tbird", [FeatureX87, FeatureSlowUAMem16, Feature3DNowA,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
-def : Proc<"athlon-4", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
- Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
FeatureSlowSHLD]>;
+def : Proc<"athlon-4", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
+ Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
def : Proc<"athlon-xp", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
- Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
+ Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
def : Proc<"athlon-mp", [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
- Feature3DNowA, FeatureFXSR, FeatureSlowBTMem,
- FeatureSlowSHLD]>;
+ Feature3DNowA, FeatureFXSR, FeatureSlowSHLD]>;
def : Proc<"k8", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
Feature3DNowA, FeatureFXSR, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"opteron", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
Feature3DNowA, FeatureFXSR, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"athlon64", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
Feature3DNowA, FeatureFXSR, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"athlon-fx", [FeatureX87, FeatureSlowUAMem16, FeatureSSE2,
Feature3DNowA, FeatureFXSR, Feature64Bit,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"k8-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"opteron-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"athlon64-sse3", [FeatureX87, FeatureSlowUAMem16, FeatureSSE3,
Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B,
- FeatureSlowBTMem, FeatureSlowSHLD]>;
+ FeatureSlowSHLD]>;
def : Proc<"amdfam10", [FeatureX87, FeatureSSE4A, Feature3DNowA,
FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD,
+ FeaturePOPCNT, FeatureSlowSHLD,
FeatureLAHFSAHF]>;
def : Proc<"barcelona", [FeatureX87, FeatureSSE4A, Feature3DNowA,
FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT,
- FeaturePOPCNT, FeatureSlowBTMem, FeatureSlowSHLD,
+ FeaturePOPCNT, FeatureSlowSHLD,
FeatureLAHFSAHF]>;
// Bobcat
@@ -929,7 +907,6 @@ def : ProcessorModel<"x86-64", SandyBrid
FeatureFXSR,
Feature64Bit,
FeatureSlow3OpsLEA,
- FeatureSlowBTMem,
FeatureSlowIncDec,
FeatureMacroFusion
]>;
Modified: llvm/trunk/lib/Target/X86/X86InstrInfo.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrInfo.td?rev=315862&r1=315861&r2=315862&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrInfo.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrInfo.td Sun Oct 15 09:57:33 2017
@@ -904,7 +904,6 @@ let RecomputePerFunction = 1 in {
"MF->getFunction()->optForSize()">;
}
-def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">;
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
@@ -1672,25 +1671,20 @@ def BT64rr : RI<0xA3, MRMDestReg, (outs)
// Unlike with the register+register form, the memory+register form of the
// bt instruction does not ignore the high bits of the index. From ISel's
// perspective, this is pretty bizarre. Make these instructions disassembly
-// only for now.
+// only for now. These instructions are also slow on modern CPUs so that's
+// another reason to avoid generating them.
let mayLoad = 1, hasSideEffects = 0, SchedRW = [WriteALULd] in {
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
- // [(X86bt (loadi16 addr:$src1), GR16:$src2),
- // (implicit EFLAGS)]
[], IIC_BT_MR
- >, OpSize16, TB, Requires<[FastBTMem]>, NotMemoryFoldable;
+ >, OpSize16, TB, NotMemoryFoldable;
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
- // [(X86bt (loadi32 addr:$src1), GR32:$src2),
- // (implicit EFLAGS)]
[], IIC_BT_MR
- >, OpSize32, TB, Requires<[FastBTMem]>, NotMemoryFoldable;
+ >, OpSize32, TB, NotMemoryFoldable;
def BT64mr : RI<0xA3, MRMDestMem, (outs), (ins i64mem:$src1, GR64:$src2),
"bt{q}\t{$src2, $src1|$src1, $src2}",
- // [(X86bt (loadi64 addr:$src1), GR64:$src2),
- // (implicit EFLAGS)]
[], IIC_BT_MR
>, TB, NotMemoryFoldable;
}
@@ -1710,9 +1704,8 @@ def BT64ri8 : RIi8<0xBA, MRM4r, (outs),
IIC_BT_RI>, TB;
} // SchedRW
-// Note that these instructions don't need FastBTMem because that
-// only applies when the other operand is in a register. When it's
-// an immediate, bt is still fast.
+// Note that these instructions aren't slow because that only applies when the
+// other operand is in a register. When it's an immediate, bt is still fast.
let SchedRW = [WriteALU] in {
def BT16mi8 : Ii8<0xBA, MRM4m, (outs), (ins i16mem:$src1, i16i8imm:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.cpp?rev=315862&r1=315861&r2=315862&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.cpp Sun Oct 15 09:57:33 2017
@@ -345,7 +345,6 @@ void X86Subtarget::initializeEnvironment
HasSGX = false;
HasCLFLUSHOPT = false;
HasCLWB = false;
- IsBTMemSlow = false;
IsPMULLDSlow = false;
IsSHLDSlow = false;
IsUAMem16Slow = false;
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=315862&r1=315861&r2=315862&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Sun Oct 15 09:57:33 2017
@@ -193,9 +193,6 @@ protected:
/// Processor has Prefetch with intent to Write instruction
bool HasPFPREFETCHWT1;
- /// True if BT (bit test) of memory instructions are slow.
- bool IsBTMemSlow;
-
/// True if SHLD instructions are slow.
bool IsSHLDSlow;
@@ -489,7 +486,6 @@ public:
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool hasMWAITX() const { return HasMWAITX; }
bool hasCLZERO() const { return HasCLZERO; }
- bool isBTMemSlow() const { return IsBTMemSlow; }
bool isSHLDSlow() const { return IsSHLDSlow; }
bool isPMULLDSlow() const { return IsPMULLDSlow; }
bool isUnalignedMem16Slow() const { return IsUAMem16Slow; }
More information about the llvm-commits
mailing list