[llvm] r340707 - [X86] Add FeatureCMOV explicitly to all CPUs that support it. Remove FeatureCMOV implication from Feature64Bit and FeatureSSE1
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 26 11:29:34 PDT 2018
Author: ctopper
Date: Sun Aug 26 11:29:33 2018
New Revision: 340707
URL: http://llvm.org/viewvc/llvm-project?rev=340707&view=rev
Log:
[X86] Add FeatureCMOV explicitly to all CPUs that support it. Remove FeatureCMOV implication from Feature64Bit and FeatureSSE1
Summary:
Previously most CPUs inherited cmov support through Feature64Bit(or FeatureCMPXCHG16HB implying Feature64Bit) or FeatureSSE1.
This has the surprising side effect that -mattr=-cmov causes an assert to fire in 64-bit mode because it clears the Feature64Bit. Or in 32-bit mode, -mattr=-cmov disables any sse/avx features which seems surprising.
This patch removes the implication and instead updates hasCMOV in X86Subtarget to check SSE1 or is64Bit in addition to the regular cmov flag. This should keep most things working the way they did before. I don't believe there is a way to specific "-cmov" directly from clang so this should only effect our lower level tools.
This does stop -mattr=cx16(cmpxchg16b) from implying cmov is enabled via the 64bit flag as you can see from one of the changed tests. But that was a 32-bit test so I don't know why it enabled cx16 anyway.
For the other test I had to add -sse to override the new sse check in hasCMOV.
Reviewers: RKSimon, DavidKreitzer, spatel
Reviewed By: RKSimon
Subscribers: llvm-commits, jfb
Differential Revision: https://reviews.llvm.org/D51228
Modified:
llvm/trunk/lib/Target/X86/X86.td
llvm/trunk/lib/Target/X86/X86Subtarget.h
llvm/trunk/test/CodeGen/X86/atomic-minmax-i6432.ll
llvm/trunk/test/CodeGen/X86/atomic32.ll
Modified: llvm/trunk/lib/Target/X86/X86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86.td?rev=340707&r1=340706&r2=340707&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86.td (original)
+++ llvm/trunk/lib/Target/X86/X86.td Sun Aug 26 11:29:33 2018
@@ -59,10 +59,7 @@ def FeatureXSAVES : SubtargetFeature<"x
"Support xsaves instructions">;
def FeatureSSE1 : SubtargetFeature<"sse", "X86SSELevel", "SSE1",
- "Enable SSE instructions",
- // SSE codegen depends on cmovs, and all
- // SSE1+ processors support them.
- [FeatureCMOV]>;
+ "Enable SSE instructions">;
def FeatureSSE2 : SubtargetFeature<"sse2", "X86SSELevel", "SSE2",
"Enable SSE2 instructions",
[FeatureSSE1]>;
@@ -93,8 +90,7 @@ def Feature3DNowA : SubtargetFeature<"3
// feature, because SSE2 can be disabled (e.g. for compiling OS kernels)
// without disabling 64-bit mode.
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
- "Support 64-bit instructions",
- [FeatureCMOV]>;
+ "Support 64-bit instructions">;
def FeatureCMPXCHG16B : SubtargetFeature<"cx16", "HasCmpxchg16b", "true",
"64-bit with cmpxchg16b",
[Feature64Bit]>;
@@ -481,7 +477,7 @@ def : Proc<"pentium2", [FeatureX8
foreach P = ["pentium3", "pentium3m"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE1,
- FeatureFXSR, FeatureNOPL]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
}
// Enable the PostRAScheduler for SSE2 and SSE3 class cpus.
@@ -496,12 +492,12 @@ foreach P = ["pentium3", "pentium3m"] in
def : ProcessorModel<"pentium-m", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+ FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
foreach P = ["pentium4", "pentium4m"] in {
def : ProcessorModel<P, GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE2, FeatureFXSR, FeatureNOPL]>;
+ FeatureSSE2, FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
}
// Intel Quark.
@@ -510,15 +506,16 @@ def : Proc<"lakemont", []>;
// Intel Core Duo.
def : ProcessorModel<"yonah", SandyBridgeModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureNOPL]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
// NetBurst.
def : ProcessorModel<"prescott", GenericPostRAModel,
[FeatureX87, FeatureSlowUAMem16, FeatureMMX, FeatureSSE3,
- FeatureFXSR, FeatureNOPL]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMOV]>;
def : ProcessorModel<"nocona", GenericPostRAModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE3,
FeatureFXSR,
@@ -530,6 +527,7 @@ def : ProcessorModel<"nocona", GenericPo
def : ProcessorModel<"core2", SandyBridgeModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
@@ -541,6 +539,7 @@ def : ProcessorModel<"core2", SandyBridg
def : ProcessorModel<"penryn", SandyBridgeModel, [
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE41,
FeatureFXSR,
@@ -555,6 +554,7 @@ class BonnellProc<string Name> : Process
ProcIntelAtom,
FeatureX87,
FeatureSlowUAMem16,
+ FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureFXSR,
@@ -575,6 +575,7 @@ def : BonnellProc<"atom">; // Pin the ge
class SilvermontProc<string Name> : ProcessorModel<Name, SLMModel, [
ProcIntelSLM,
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
@@ -609,6 +610,7 @@ class ProcModel<string Name, SchedMachin
def GLMFeatures : ProcessorFeatures<[], [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
@@ -668,6 +670,7 @@ def : TremontProc<"tremont">;
// "Arrandale" along with corei3 and corei5
class NehalemProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
@@ -684,6 +687,7 @@ def : NehalemProc<"corei7">;
// Westmere is the corei3/i5/i7 path from nehalem to sandybridge
class WestmereProc<string Name> : ProcessorModel<Name, SandyBridgeModel, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE42,
FeatureFXSR,
@@ -701,6 +705,7 @@ def : WestmereProc<"westmere">;
// rather than a superset.
def SNBFeatures : ProcessorFeatures<[], [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureAVX,
FeatureFXSR,
@@ -909,29 +914,32 @@ foreach P = ["athlon", "athlon-tbird"] i
}
foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in {
- def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE1,
+ def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureCMOV, FeatureSSE1,
Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureSlowSHLD]>;
}
foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE2, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD]>;
+ FeatureFXSR, FeatureNOPL, Feature64Bit, FeatureSlowSHLD,
+ FeatureCMOV]>;
}
foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in {
def : Proc<P, [FeatureX87, FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA,
- FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD]>;
+ FeatureFXSR, FeatureNOPL, FeatureCMPXCHG16B, FeatureSlowSHLD,
+ FeatureCMOV]>;
}
foreach P = ["amdfam10", "barcelona"] in {
def : Proc<P, [FeatureX87, FeatureSSE4A, Feature3DNowA, FeatureFXSR,
FeatureNOPL, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT,
- FeatureSlowSHLD, FeatureLAHFSAHF]>;
+ FeatureSlowSHLD, FeatureLAHFSAHF, FeatureCMOV]>;
}
// Bobcat
def : Proc<"btver1", [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSSE3,
FeatureSSE4A,
@@ -949,6 +957,7 @@ def : Proc<"btver1", [
// Jaguar
def : ProcessorModel<"btver2", BtVer2Model, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureAVX,
FeatureFXSR,
@@ -975,6 +984,7 @@ def : ProcessorModel<"btver2", BtVer2Mod
// Bulldozer
def : Proc<"bdver1", [
FeatureX87,
+ FeatureCMOV,
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
@@ -998,6 +1008,7 @@ def : Proc<"bdver1", [
// Piledriver
def : Proc<"bdver2", [
FeatureX87,
+ FeatureCMOV,
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
@@ -1026,6 +1037,7 @@ def : Proc<"bdver2", [
// Steamroller
def : Proc<"bdver3", [
FeatureX87,
+ FeatureCMOV,
FeatureXOP,
FeatureFMA4,
FeatureCMPXCHG16B,
@@ -1056,6 +1068,7 @@ def : Proc<"bdver3", [
// Excavator
def : Proc<"bdver4", [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureAVX2,
FeatureFXSR,
@@ -1093,6 +1106,7 @@ def: ProcessorModel<"znver1", Znver1Mode
FeatureBMI2,
FeatureCLFLUSHOPT,
FeatureCLZERO,
+ FeatureCMOV,
FeatureCMPXCHG16B,
FeatureF16C,
FeatureFMA,
@@ -1127,7 +1141,7 @@ def : Proc<"winchip-c6", [FeatureX8
def : Proc<"winchip2", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3", [FeatureX87, FeatureSlowUAMem16, Feature3DNow]>;
def : Proc<"c3-2", [FeatureX87, FeatureSlowUAMem16, FeatureMMX,
- FeatureSSE1, FeatureFXSR]>;
+ FeatureSSE1, FeatureFXSR, FeatureCMOV]>;
// We also provide a generic 64-bit specific x86 processor model which tries to
// be good for modern chips without enabling instruction set encodings past the
@@ -1141,6 +1155,7 @@ def : Proc<"c3-2", [FeatureX8
// forming a common base for them.
def : ProcessorModel<"x86-64", SandyBridgeModel, [
FeatureX87,
+ FeatureCMOV,
FeatureMMX,
FeatureSSE2,
FeatureFXSR,
Modified: llvm/trunk/lib/Target/X86/X86Subtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86Subtarget.h?rev=340707&r1=340706&r2=340707&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86Subtarget.h (original)
+++ llvm/trunk/lib/Target/X86/X86Subtarget.h Sun Aug 26 11:29:33 2018
@@ -542,7 +542,9 @@ public:
bool hasX87() const { return HasX87; }
bool hasNOPL() const { return HasNOPL; }
- bool hasCMov() const { return HasCMov; }
+ // SSE codegen depends on cmovs, and all SSE1+ processors support them.
+ // All 64-bit processors support cmov.
+ bool hasCMov() const { return HasCMov || X86SSELevel >= SSE1 || is64Bit(); }
bool hasSSE1() const { return X86SSELevel >= SSE1; }
bool hasSSE2() const { return X86SSELevel >= SSE2; }
bool hasSSE3() const { return X86SSELevel >= SSE3; }
Modified: llvm/trunk/test/CodeGen/X86/atomic-minmax-i6432.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic-minmax-i6432.ll?rev=340707&r1=340706&r2=340707&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic-minmax-i6432.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic-minmax-i6432.ll Sun Aug 26 11:29:33 2018
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mattr=+cmov,cx16 -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
-; RUN: llc -mattr=cx16 -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
+; RUN: llc -mattr=+cmov -mtriple=i386-pc-linux -verify-machineinstrs < %s | FileCheck %s -check-prefix=LINUX
+; RUN: llc -mattr=+cmov -mtriple=i386-macosx -relocation-model=pic -verify-machineinstrs < %s | FileCheck %s -check-prefix=PIC
@sc64 = external global i64
Modified: llvm/trunk/test/CodeGen/X86/atomic32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/atomic32.ll?rev=340707&r1=340706&r2=340707&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/atomic32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/atomic32.ll Sun Aug 26 11:29:33 2018
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -O0 -mtriple=x86_64-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s -check-prefixes=X64,X64-CMOV
; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -verify-machineinstrs | FileCheck %s -check-prefixes=X86,X86-CMOV
-; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOCMOV
+; RUN: llc < %s -O0 -mtriple=i686-unknown-unknown -mcpu=corei7 -mattr=-cmov,-sse -verify-machineinstrs | FileCheck %s --check-prefixes=X86,X86-NOCMOV
@sc32 = external global i32
More information about the llvm-commits
mailing list