[llvm] 012dd42 - [X86] Support -march=x86-64-v[234]
Fangrui Song via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 12 10:29:54 PDT 2020
Author: Fangrui Song
Date: 2020-10-12T10:29:46-07:00
New Revision: 012dd42e027e2ff3d183cc9dcf27004cf9711720
URL: https://github.com/llvm/llvm-project/commit/012dd42e027e2ff3d183cc9dcf27004cf9711720
DIFF: https://github.com/llvm/llvm-project/commit/012dd42e027e2ff3d183cc9dcf27004cf9711720.diff
LOG: [X86] Support -march=x86-64-v[234]
PR47686. These micro-architecture levels are defined in the x86-64 psABI:
https://gitlab.com/x86-psABIs/x86-64-ABI/-/commit/77566eb03bc6a326811cb7e9
GCC 11 will support these levels.
Note, -mtune=x86-64-v[234] are invalid and __builtin_cpu_is cannot be
used on them.
Reviewed By: craig.topper, RKSimon
Differential Revision: https://reviews.llvm.org/D89197
Added:
clang/test/Preprocessor/predefined-arch-macros-x86.c
Modified:
clang/docs/ReleaseNotes.rst
clang/docs/UsersManual.rst
clang/lib/Basic/Targets/X86.cpp
clang/lib/Basic/Targets/X86.h
clang/test/CodeGen/attr-target-x86.c
clang/test/Driver/x86-march.c
clang/test/Driver/x86-mtune.c
clang/test/Misc/target-invalid-cpu-note.c
clang/test/Preprocessor/predefined-arch-macros.c
clang/test/Sema/builtin-cpu-supports.c
llvm/docs/ReleaseNotes.rst
llvm/include/llvm/Support/X86TargetParser.h
llvm/lib/Support/X86TargetParser.cpp
llvm/lib/Target/X86/X86.td
llvm/test/CodeGen/X86/cpus-other.ll
Removed:
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 66427f293775..d0ef03acaa4c 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -188,7 +188,10 @@ X86 Support in Clang
- The x86 intrinsics ``__rorb``, ``__rorw``, ``__rord``, ``__rorq`, ``_rotr``,
``_rotwr`` and ``_lrotr`` may now be used within constant expressions.
-- Support for -march=sapphirerapids was added.
+- Support for ``-march=sapphirerapids`` was added.
+
+- Support for ``-march=x86-64-v[234]`` has been added.
+ See :doc:`UsersManual` for details about these micro-architecture levels.
- The -mtune command line option is no longer ignored for X86. This can be used
to request microarchitectural optimizations independent on -march. -march=<cpu>
diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index ed6c9e3bc341..f313ce72d8ed 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -3201,6 +3201,15 @@ and the ABI remains 32-bit but the assembler emits instructions
appropriate for a CPU running in 16-bit mode, with address-size and
operand-size prefixes to enable 32-bit addressing and operations.
+Several micro-architecture levels as specified by the x86-64 psABI are defined.
+They are cumulative in the sense that features from previous levels are
+implicitly included in later levels.
+
+- ``-march=x86-64``: CMOV, CMPXCHG8B, FPU, FXSR, MMX, FXSR, SCE, SSE, SSE2
+- ``-march=x86-64-v2``: (close to Nehalem) CMPXCHG16B, LAHF-SAHF, POPCNT, SSE3, SSE4.1, SSE4.2, SSSE3
+- ``-march=x86-64-v3``: (close to Haswell) AVX, AVX2, BMI1, BMI2, F16C, FMA, LZCNT, MOVBE, XSAVE
+- ``-march=x86-64-v4``: AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
+
ARM
^^^
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index ef83703d6097..98ac13b1ae9b 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -506,6 +506,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_K8:
case CK_K8SSE3:
case CK_x86_64:
+ case CK_x86_64_v2:
+ case CK_x86_64_v3:
+ case CK_x86_64_v4:
defineCPUMacros(Builder, "k8");
break;
case CK_AMDFAM10:
@@ -1312,6 +1315,9 @@ Optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
case CK_ZNVER2:
// Deprecated
case CK_x86_64:
+ case CK_x86_64_v2:
+ case CK_x86_64_v3:
+ case CK_x86_64_v4:
case CK_Yonah:
case CK_Penryn:
case CK_Core2:
@@ -1456,7 +1462,7 @@ void X86TargetInfo::fillValidCPUList(SmallVectorImpl<StringRef> &Values) const {
}
void X86TargetInfo::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values) const {
- llvm::X86::fillValidCPUArchList(Values);
+ llvm::X86::fillValidTuneCPUList(Values);
}
ArrayRef<const char *> X86TargetInfo::getGCCRegNames() const {
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 7b2b7dcf6460..4fc495a09bbb 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -314,7 +314,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
// Allow 32-bit only CPUs regardless of 64-bit mode unlike isValidCPUName.
// NOTE: gcc rejects 32-bit mtune CPUs in 64-bit mode. But being lenient
// since mtune was ignored by clang for so long.
- return llvm::X86::parseArchX86(Name) != llvm::X86::CK_None;
+ return llvm::X86::parseTuneCPU(Name) != llvm::X86::CK_None;
}
void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index 738b65b11131..99cca0b4d9dd 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -32,6 +32,10 @@ int __attribute__((target("arch=lakemont,mmx"))) use_before_def(void) {
int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
+void __attribute__((target("arch=x86-64-v2"))) x86_64_v2() {}
+void __attribute__((target("arch=x86-64-v3"))) x86_64_v3() {}
+void __attribute__((target("arch=x86-64-v4"))) x86_64_v4() {}
+
// Check that we emit the additional subtarget and cpu features for foo and not for baz or bar.
// CHECK: baz{{.*}} #0
// CHECK: foo{{.*}} #1
@@ -59,3 +63,10 @@ int __attribute__((target("tune=sandybridge"))) walrus(int a) { return 4; }
// CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+cx8,+mmx"
// CHECK-NOT: tune-cpu
// CHECK: #8 = {{.*}}"target-cpu"="i686" "target-features"="+cx8,+x87" "tune-cpu"="sandybridge"
+
+// CHECK: "target-cpu"="x86-64-v2"
+// CHECK-SAME: "target-features"="+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
+// CHECK: "target-cpu"="x86-64-v3"
+// CHECK-SAME: "target-features"="+avx,+avx2,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
+// CHECK: "target-cpu"="x86-64-v4"
+// CHECK-SAME: "target-features"="+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+cx16,+cx8,+f16c,+fma,+fxsr,+lzcnt,+mmx,+movbe,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave"
diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c
index 57882a5bf637..26ffc161c273 100644
--- a/clang/test/Driver/x86-march.c
+++ b/clang/test/Driver/x86-march.c
@@ -175,3 +175,12 @@
// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver2 2>&1 \
// RUN: | FileCheck %s -check-prefix=znver2
// znver2: "-target-cpu" "znver2"
+
+// RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64
+// x86-64: "-target-cpu" "x86-64"
+// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v2 2>&1 | FileCheck %s --check-prefix=x86-64-v2
+// x86-64-v2: "-target-cpu" "x86-64-v2"
+// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v3 2>&1 | FileCheck %s --check-prefix=x86-64-v3
+// x86-64-v3: "-target-cpu" "x86-64-v3"
+// RUN: %clang -target x86_64 -c -### %s -march=x86-64-v4 2>&1 | FileCheck %s --check-prefix=x86-64-v4
+// x86-64-v4: "-target-cpu" "x86-64-v4"
diff --git a/clang/test/Driver/x86-mtune.c b/clang/test/Driver/x86-mtune.c
index a313412b6ab2..9ab4bf10df8e 100644
--- a/clang/test/Driver/x86-mtune.c
+++ b/clang/test/Driver/x86-mtune.c
@@ -40,3 +40,8 @@
// RUN: | FileCheck %s -check-prefix=marchmtune
// marchmtune: "-target-cpu" "core2"
// mmarchmtune: "-tune-cpu" "nehalem"
+
+// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v2 2>&1 | FileCheck %s --check-prefix=INVALID
+// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v3 2>&1 | FileCheck %s --check-prefix=INVALID
+// RUN: not %clang %s -target x86_64 -E -mtune=x86-64-v4 2>&1 | FileCheck %s --check-prefix=INVALID
+// INVALID: error: unknown target CPU '{{.*}}'
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 4f5dbac133cb..c35e6c2c8e62 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -25,7 +25,7 @@
// X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
// X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
// X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
-// X86-SAME: x86-64, geode
+// X86-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
// RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
// X86_64: error: unknown target CPU 'not-a-cpu'
@@ -35,7 +35,8 @@
// X86_64-SAME: core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake,
// X86_64-SAME: icelake-client, icelake-server, tigerlake, sapphirerapids, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3,
// X86_64-SAME: athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1,
-// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, x86-64
+// X86_64-SAME: btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
+// X86_64-SAME: x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
// RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86
// TUNE_X86: error: unknown target CPU 'not-a-cpu'
@@ -49,7 +50,7 @@
// TUNE_X86-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
// TUNE_X86-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
// TUNE_X86-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
-// TUNE_X86-SAME: x86-64, geode
+// TUNE_X86-SAME: x86-64, geode{{$}}
// RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64
// TUNE_X86_64: error: unknown target CPU 'not-a-cpu'
@@ -63,7 +64,7 @@
// TUNE_X86_64-SAME: athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64,
// TUNE_X86_64-SAME: athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10,
// TUNE_X86_64-SAME: barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2,
-// TUNE_X86_64-SAME: x86-64, geode
+// TUNE_X86_64-SAME: x86-64, geode{{$}}
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/Preprocessor/predefined-arch-macros-x86.c b/clang/test/Preprocessor/predefined-arch-macros-x86.c
new file mode 100644
index 000000000000..37b7c612b491
--- /dev/null
+++ b/clang/test/Preprocessor/predefined-arch-macros-x86.c
@@ -0,0 +1,54 @@
+// RUN: %clang -target x86_64 -march=x86-64 -E -dM %s > %tv1
+// RUN: FileCheck %s --check-prefix=X86_64_V1 < %tv1
+
+// X86_64_V1: #define __MMX__ 1
+// X86_64_V1: #define __SSE2_MATH__ 1
+// X86_64_V1: #define __SSE2__ 1
+// X86_64_V1: #define __SSE_MATH__ 1
+// X86_64_V1: #define __SSE__ 1
+// X86_64_V1: #define __amd64 1
+// X86_64_V1: #define __amd64__ 1
+// X86_64_V1: #define __k8 1
+// X86_64_V1: #define __k8__ 1
+// X86_64_V1: #define __x86_64 1
+// X86_64_V1: #define __x86_64__ 1
+
+// RUN: %clang -target x86_64 -march=x86-64-v2 -E -dM %s > %tv2
+// RUN:
diff %tv1 %tv2 > %t.txt || true
+// RUN: FileCheck %s --check-prefix=X86_64_V2 < %t.txt
+
+/// v2 is close to Nehalem.
+// X86_64_V2: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1
+// X86_64_V2: #define __LAHF_SAHF__ 1
+// X86_64_V2: #define __POPCNT__ 1
+// X86_64_V2: #define __SSE3__ 1
+// X86_64_V2-NEXT: #define __SSE4_1__ 1
+// X86_64_V2-NEXT: #define __SSE4_2__ 1
+// X86_64_V2: #define __SSSE3__ 1
+
+/// v3 is close to Haswell.
+// RUN: %clang -target x86_64 -march=x86-64-v3 -E -dM %s > %tv3
+// RUN:
diff %tv2 %tv3 > %t.txt || true
+// RUN: FileCheck %s --check-prefix=X86_64_V3 < %t.txt
+
+// X86_64_V3: #define __AVX2__ 1
+// X86_64_V3-NEXT: #define __AVX__ 1
+// X86_64_V3: #define __BMI2__ 1
+// X86_64_V3-NEXT: #define __BMI__ 1
+// X86_64_V3: #define __F16C__ 1
+// X86_64_V3: #define __FMA__ 1
+// X86_64_V3: #define __LZCNT__ 1
+// X86_64_V3: #define __MOVBE__ 1
+// X86_64_V3: #define __XSAVE__ 1
+
+/// v4 is close to the AVX-512 level implemented by Xeon Scalable Processors.
+// RUN: %clang -target x86_64 -march=x86-64-v4 -E -dM %s > %tv4
+// RUN:
diff %tv3 %tv4 > %t.txt || true
+// RUN: FileCheck %s --check-prefix=X86_64_V4 < %t.txt
+
+// X86_64_V4: #define __AVX512BW__ 1
+// X86_64_V4-NEXT: #define __AVX512CD__ 1
+// X86_64_V4-NEXT: #define __AVX512DQ__ 1
+// X86_64_V4-NEXT: #define __AVX512F__ 1
+// X86_64_V4-NEXT: #define __AVX512VL__ 1
+// X86_64_V4-NOT: #define __AVX512{{.*}}
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 7993c587efcd..820a242955c2 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -2263,21 +2263,6 @@
// CHECK_X86_64_M32: #define __k8__ 1
// CHECK_X86_64_M32: #define i386 1
-// RUN: %clang -march=x86-64 -m64 -E -dM %s -o - 2>&1 \
-// RUN: -target i386-unknown-linux \
-// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_X86_64_M64
-// CHECK_X86_64_M64: #define __MMX__ 1
-// CHECK_X86_64_M64: #define __SSE2_MATH__ 1
-// CHECK_X86_64_M64: #define __SSE2__ 1
-// CHECK_X86_64_M64: #define __SSE_MATH__ 1
-// CHECK_X86_64_M64: #define __SSE__ 1
-// CHECK_X86_64_M64: #define __amd64 1
-// CHECK_X86_64_M64: #define __amd64__ 1
-// CHECK_X86_64_M64: #define __k8 1
-// CHECK_X86_64_M64: #define __k8__ 1
-// CHECK_X86_64_M64: #define __x86_64 1
-// CHECK_X86_64_M64: #define __x86_64__ 1
-
// RUN: %clang -march=k8 -m32 -E -dM %s -o - 2>&1 \
// RUN: -target i386-unknown-linux \
// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M32
diff --git a/clang/test/Sema/builtin-cpu-supports.c b/clang/test/Sema/builtin-cpu-supports.c
index 026b5b7e38e9..05b58314f093 100644
--- a/clang/test/Sema/builtin-cpu-supports.c
+++ b/clang/test/Sema/builtin-cpu-supports.c
@@ -15,6 +15,11 @@ int main() {
if (__builtin_cpu_is("int")) // expected-error {{invalid cpu name for builtin}}
a("intel");
+
+ (void)__builtin_cpu_is("x86-64"); // expected-error {{invalid cpu name for builtin}}
+ (void)__builtin_cpu_is("x86-64-v2"); // expected-error {{invalid cpu name for builtin}}
+ (void)__builtin_cpu_is("x86-64-v3"); // expected-error {{invalid cpu name for builtin}}
+ (void)__builtin_cpu_is("x86-64-v4"); // expected-error {{invalid cpu name for builtin}}
#else
if (__builtin_cpu_supports("vsx")) // expected-error {{use of unknown builtin}}
a("vsx");
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index b81547dca6df..d75245e7055d 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -105,7 +105,7 @@ During this release ...
* The 'mpx' feature was removed from the backend. It had been removed from clang
frontend in 10.0. Mention of the 'mpx' feature in an IR file will print a
message to stderr, but IR should still compile.
-* Support for -march=sapphirerapids was added.
+* Support for ``-march=sapphirerapids`` and ``-march=x86-64-v[234]`` has been added.
* The assembler now has support for {disp32} and {disp8} pseudo prefixes for
controlling displacement size for memory operands and jump displacements. The
assembler also supports the .d32 and .d8 mnemonic suffixes to do the same.
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
index d97f620419e1..e66a074aa000 100644
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -121,17 +121,24 @@ enum CPUKind {
CK_ZNVER1,
CK_ZNVER2,
CK_x86_64,
+ CK_x86_64_v2,
+ CK_x86_64_v3,
+ CK_x86_64_v4,
CK_Geode,
};
/// Parse \p CPU string into a CPUKind. Will only accept 64-bit capable CPUs if
/// \p Only64Bit is true.
CPUKind parseArchX86(StringRef CPU, bool Only64Bit = false);
+CPUKind parseTuneCPU(StringRef CPU, bool Only64Bit = false);
/// Provide a list of valid CPU names. If \p Only64Bit is true, the list will
/// only contain 64-bit capable CPUs.
void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
bool Only64Bit = false);
+/// Provide a list of valid -mtune names.
+void fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
+ bool Only64Bit = false);
/// Get the key feature prioritizing target multiversioning.
ProcessorFeatures getKeyFeature(CPUKind Kind);
diff --git a/llvm/lib/Support/X86TargetParser.cpp b/llvm/lib/Support/X86TargetParser.cpp
index 1e8569bcd540..35582a9b277c 100644
--- a/llvm/lib/Support/X86TargetParser.cpp
+++ b/llvm/lib/Support/X86TargetParser.cpp
@@ -137,6 +137,15 @@ constexpr FeatureBitset FeaturesNocona =
// Basic 64-bit capable CPU.
constexpr FeatureBitset FeaturesX86_64 = FeaturesPentium4 | Feature64BIT;
+constexpr FeatureBitset FeaturesX86_64_V2 = FeaturesX86_64 | FeatureSAHF |
+ FeaturePOPCNT | FeatureSSE4_2 |
+ FeatureCMPXCHG16B;
+constexpr FeatureBitset FeaturesX86_64_V3 =
+ FeaturesX86_64_V2 | FeatureAVX2 | FeatureBMI | FeatureBMI2 | FeatureF16C |
+ FeatureFMA | FeatureLZCNT | FeatureMOVBE | FeatureXSAVE;
+constexpr FeatureBitset FeaturesX86_64_V4 = FeaturesX86_64_V3 |
+ FeatureAVX512BW | FeatureAVX512CD |
+ FeatureAVX512DQ | FeatureAVX512VL;
// Intel Core CPUs
constexpr FeatureBitset FeaturesCore2 =
@@ -383,10 +392,15 @@ constexpr ProcInfo Processors[] = {
{ {"znver2"}, CK_ZNVER2, FEATURE_AVX2, FeaturesZNVER2 },
// Generic 64-bit processor.
{ {"x86-64"}, CK_x86_64, ~0U, FeaturesX86_64 },
+ { {"x86-64-v2"}, CK_x86_64_v2, ~0U, FeaturesX86_64_V2 },
+ { {"x86-64-v3"}, CK_x86_64_v3, ~0U, FeaturesX86_64_V3 },
+ { {"x86-64-v4"}, CK_x86_64_v4, ~0U, FeaturesX86_64_V4 },
// Geode processors.
{ {"geode"}, CK_Geode, ~0U, FeaturesGeode },
};
+constexpr const char *NoTuneList[] = {"x86-64-v2", "x86-64-v3", "x86-64-v4"};
+
X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
for (const auto &P : Processors)
if (P.Name == CPU && (P.Features[FEATURE_64BIT] || !Only64Bit))
@@ -395,6 +409,12 @@ X86::CPUKind llvm::X86::parseArchX86(StringRef CPU, bool Only64Bit) {
return CK_None;
}
+X86::CPUKind llvm::X86::parseTuneCPU(StringRef CPU, bool Only64Bit) {
+ if (llvm::is_contained(NoTuneList, CPU))
+ return CK_None;
+ return parseArchX86(CPU, Only64Bit);
+}
+
void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
bool Only64Bit) {
for (const auto &P : Processors)
@@ -402,6 +422,14 @@ void llvm::X86::fillValidCPUArchList(SmallVectorImpl<StringRef> &Values,
Values.emplace_back(P.Name);
}
+void llvm::X86::fillValidTuneCPUList(SmallVectorImpl<StringRef> &Values,
+ bool Only64Bit) {
+ for (const ProcInfo &P : Processors)
+ if (!P.Name.empty() && (P.Features[FEATURE_64BIT] || !Only64Bit) &&
+ !llvm::is_contained(NoTuneList, P.Name))
+ Values.emplace_back(P.Name);
+}
+
ProcessorFeatures llvm::X86::getKeyFeature(X86::CPUKind Kind) {
// FIXME: Can we avoid a linear search here? The table might be sorted by
// CPUKind so we could binary search?
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index e5d47a0ac325..5419c35502c6 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -558,18 +558,27 @@ include "X86SchedSkylakeServer.td"
//===----------------------------------------------------------------------===//
def ProcessorFeatures {
+ // x86-64 and x86-64-v[234]
+ list<SubtargetFeature> X86_64V1Features = [
+ FeatureX87, FeatureCMPXCHG8B, FeatureCMOV, FeatureMMX, FeatureSSE2,
+ FeatureFXSR, FeatureNOPL, Feature64Bit
+ ];
+ list<SubtargetFeature> X86_64V2Features = !listconcat(
+ X86_64V1Features,
+ [FeatureCMPXCHG16B, FeatureLAHFSAHF, FeaturePOPCNT, FeatureSSE42]);
+ list<SubtargetFeature> X86_64V3Features = !listconcat(X86_64V2Features, [
+ FeatureAVX2, FeatureBMI, FeatureBMI2, FeatureF16C, FeatureFMA, FeatureLZCNT,
+ FeatureMOVBE, FeatureXSAVE
+ ]);
+ list<SubtargetFeature> X86_64V4Features = !listconcat(X86_64V3Features, [
+ FeatureBWI,
+ FeatureCDI,
+ FeatureDQI,
+ FeatureVLX,
+ ]);
+
// Nehalem
- list<SubtargetFeature> NHMFeatures = [FeatureX87,
- FeatureCMPXCHG8B,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE42,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
- FeatureCMPXCHG16B,
- FeaturePOPCNT,
- FeatureLAHFSAHF];
+ list<SubtargetFeature> NHMFeatures = X86_64V2Features;
list<SubtargetFeature> NHMTuning = [FeatureMacroFusion,
FeatureInsertVZEROUPPER];
@@ -1350,16 +1359,7 @@ def : Proc<"c3-2", [FeatureX87, FeatureCMPXCHG8B, FeatureMMX,
// covers a huge swath of x86 processors. If there are specific scheduling
// knobs which need to be tuned
diff erently for AMD chips, we might consider
// forming a common base for them.
-def : ProcModel<"x86-64", SandyBridgeModel, [
- FeatureX87,
- FeatureCMPXCHG8B,
- FeatureCMOV,
- FeatureMMX,
- FeatureSSE2,
- FeatureFXSR,
- FeatureNOPL,
- Feature64Bit,
-],
+def : ProcModel<"x86-64", SandyBridgeModel, ProcessorFeatures.X86_64V1Features,
[
FeatureSlow3OpsLEA,
FeatureSlowDivide64,
@@ -1368,6 +1368,16 @@ def : ProcModel<"x86-64", SandyBridgeModel, [
FeatureInsertVZEROUPPER
]>;
+// x86-64 micro-architecture levels.
+def : ProcModel<"x86-64-v2", SandyBridgeModel, ProcessorFeatures.X86_64V2Features,
+ ProcessorFeatures.SNBTuning>;
+// Close to Haswell.
+def : ProcModel<"x86-64-v3", HaswellModel, ProcessorFeatures.X86_64V3Features,
+ ProcessorFeatures.HSWTuning>;
+// Close to the AVX-512 level implemented by Xeon Scalable Processors.
+def : ProcModel<"x86-64-v4", HaswellModel, ProcessorFeatures.X86_64V4Features,
+ ProcessorFeatures.SKXTuning>;
+
//===----------------------------------------------------------------------===//
// Calling Conventions
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/cpus-other.ll b/llvm/test/CodeGen/X86/cpus-other.ll
index 89231bf7665a..0b97be4b6999 100644
--- a/llvm/test/CodeGen/X86/cpus-other.ll
+++ b/llvm/test/CodeGen/X86/cpus-other.ll
@@ -16,6 +16,11 @@
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=c3-2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+;; x86-64 micro-architecture levels.
+; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v2
+; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v3
+; RUN: llc %s -filetype=null -mtriple=x86_64 -mcpu=x86-64-v4
+
define void @foo() {
ret void
}
More information about the llvm-commits
mailing list