[clang] [compiler-rt] [llvm] [X86] Support -march=diamondrapids (PR #113881)
Freddy Ye via cfe-commits
cfe-commits at lists.llvm.org
Thu Nov 14 21:52:09 PST 2024
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/113881
>From 4a7f17f29a007ce7af0893670a362f738b0d8d6e Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 21 Oct 2024 15:27:24 +0800
Subject: [PATCH 1/5] [X86] Support -march=diamondrapids
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368
---
clang/lib/Basic/Targets/X86.cpp | 2 +
clang/test/CodeGen/attr-cpuspecific-cpus.c | 1 +
clang/test/CodeGen/attr-target-mv.c | 1 +
clang/test/CodeGen/target-builtin-noerror.c | 1 +
clang/test/Driver/x86-march.c | 4 ++
clang/test/Misc/target-invalid-cpu-note/x86.c | 4 ++
.../Preprocessor/predefined-arch-macros.c | 42 +++++++++++++++++++
compiler-rt/lib/builtins/cpu_model/x86.c | 14 +++++++
.../llvm/TargetParser/X86TargetParser.def | 1 +
.../llvm/TargetParser/X86TargetParser.h | 1 +
llvm/lib/Target/X86/X86.td | 24 +++++++++++
llvm/lib/TargetParser/Host.cpp | 13 ++++++
llvm/lib/TargetParser/X86TargetParser.cpp | 8 ++++
llvm/test/CodeGen/X86/cpus-intel.ll | 2 +
14 files changed, 118 insertions(+)
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index d067ec218b5270..7787bbde513dcf 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -649,6 +649,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_GraniterapidsD:
case CK_Emeraldrapids:
case CK_Clearwaterforest:
+ case CK_DiamondRapids:
// FIXME: Historically, we defined this legacy name, it would be nice to
// remove it at some point. We've never exposed fine-grained names for
// recent primary x86 CPUs, and we should keep it that way.
@@ -1613,6 +1614,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
case CK_GraniterapidsD:
case CK_Emeraldrapids:
case CK_Clearwaterforest:
+ case CK_DiamondRapids:
case CK_KNL:
case CK_KNM:
// K7
diff --git a/clang/test/CodeGen/attr-cpuspecific-cpus.c b/clang/test/CodeGen/attr-cpuspecific-cpus.c
index dd154fd227b25b..1dd095ec9e191f 100644
--- a/clang/test/CodeGen/attr-cpuspecific-cpus.c
+++ b/clang/test/CodeGen/attr-cpuspecific-cpus.c
@@ -43,6 +43,7 @@ ATTR(cpu_specific(icelake_client)) void CPU(void){}
ATTR(cpu_specific(tigerlake)) void CPU(void){}
ATTR(cpu_specific(alderlake)) void CPU(void){}
ATTR(cpu_specific(sapphirerapids)) void CPU(void){}
+ATTR(cpu_specific(diamondrapids)) void CPU(void){}
// ALIAS CPUs
ATTR(cpu_specific(pentium_iii_no_xmm_regs)) void CPU0(void){}
diff --git a/clang/test/CodeGen/attr-target-mv.c b/clang/test/CodeGen/attr-target-mv.c
index 2c4b95ca04370a..6911b55203b7e7 100644
--- a/clang/test/CodeGen/attr-target-mv.c
+++ b/clang/test/CodeGen/attr-target-mv.c
@@ -29,6 +29,7 @@ int __attribute__((target("arch=lunarlake"))) foo(void) {return 23;}
int __attribute__((target("arch=gracemont"))) foo(void) {return 24;}
int __attribute__((target("arch=pantherlake"))) foo(void) {return 25;}
int __attribute__((target("arch=clearwaterforest"))) foo(void) {return 26;}
+int __attribute__((target("arch=diamondrapids"))) foo(void) {return 27;}
int __attribute__((target("default"))) foo(void) { return 2; }
int bar(void) {
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 1e53621bc6b5ae..0bbd8c3e5ddd81 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -209,4 +209,5 @@ void verifycpustrings(void) {
(void)__builtin_cpu_is("znver3");
(void)__builtin_cpu_is("znver4");
(void)__builtin_cpu_is("znver5");
+ (void)__builtin_cpu_is("diamondrapids");
}
diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c
index 3bc2a82ae778d6..341f01c8d668df 100644
--- a/clang/test/Driver/x86-march.c
+++ b/clang/test/Driver/x86-march.c
@@ -120,6 +120,10 @@
// RUN: | FileCheck %s -check-prefix=clearwaterforest
// clearwaterforest: "-target-cpu" "clearwaterforest"
//
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=diamondrapids 2>&1 \
+// RUN: | FileCheck %s -check-prefix=diamondrapids
+// diamondrapids: "-target-cpu" "diamondrapids"
+//
// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=lakemont 2>&1 \
// RUN: | FileCheck %s -check-prefix=lakemont
// lakemont: "-target-cpu" "lakemont"
diff --git a/clang/test/Misc/target-invalid-cpu-note/x86.c b/clang/test/Misc/target-invalid-cpu-note/x86.c
index 7879676040af46..f89cdc2aa573ff 100644
--- a/clang/test/Misc/target-invalid-cpu-note/x86.c
+++ b/clang/test/Misc/target-invalid-cpu-note/x86.c
@@ -69,6 +69,7 @@
// X86-SAME: {{^}}, graniterapids-d
// X86-SAME: {{^}}, emeraldrapids
// X86-SAME: {{^}}, clearwaterforest
+// X86-SAME: {{^}}, diamondrapids
// X86-SAME: {{^}}, knl
// X86-SAME: {{^}}, knm
// X86-SAME: {{^}}, lakemont
@@ -155,6 +156,7 @@
// X86_64-SAME: {{^}}, graniterapids-d
// X86_64-SAME: {{^}}, emeraldrapids
// X86_64-SAME: {{^}}, clearwaterforest
+// X86_64-SAME: {{^}}, diamondrapids
// X86_64-SAME: {{^}}, knl
// X86_64-SAME: {{^}}, knm
// X86_64-SAME: {{^}}, k8
@@ -250,6 +252,7 @@
// TUNE_X86-SAME: {{^}}, graniterapids-d
// TUNE_X86-SAME: {{^}}, emeraldrapids
// TUNE_X86-SAME: {{^}}, clearwaterforest
+// TUNE_X86-SAME: {{^}}, diamondrapids
// TUNE_X86-SAME: {{^}}, knl
// TUNE_X86-SAME: {{^}}, knm
// TUNE_X86-SAME: {{^}}, lakemont
@@ -352,6 +355,7 @@
// TUNE_X86_64-SAME: {{^}}, graniterapids-d
// TUNE_X86_64-SAME: {{^}}, emeraldrapids
// TUNE_X86_64-SAME: {{^}}, clearwaterforest
+// TUNE_X86_64-SAME: {{^}}, diamondrapids
// TUNE_X86_64-SAME: {{^}}, knl
// TUNE_X86_64-SAME: {{^}}, knm
// TUNE_X86_64-SAME: {{^}}, lakemont
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 35801e758cc58a..41bec61079d827 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -1867,6 +1867,9 @@
// RUN: %clang -march=graniterapids-d -m32 -E -dM %s -o - 2>&1 \
// RUN: --target=i386 \
// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M32,CHECK_GNRD_M32
+// RUN: %clang -march=diamondrapids -m32 -E -dM %s -o - 2>&1 \
+// RUN: --target=i386 \
+// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M32,CHECK_GNRD_M32,CHECK_DMR_M32
// CHECK_GNR_M32: #define __AES__ 1
// CHECK_GNR_M32: #define __AMX_BF16__ 1
// CHECK_GNR_M32-NOT: #define __AMX_COMPLEX__ 1
@@ -1874,6 +1877,8 @@
// CHECK_GNR_M32: #define __AMX_FP16__ 1
// CHECK_GNR_M32: #define __AMX_INT8__ 1
// CHECK_GNR_M32: #define __AMX_TILE__ 1
+// CHECK_DMR_M32: #define __AVX10_2_512__ 1
+// CHECK_DMR_M32: #define __AVX10_2__ 1
// CHECK_GNR_M32: #define __AVX2__ 1
// CHECK_GNR_M32: #define __AVX512BF16__ 1
// CHECK_GNR_M32: #define __AVX512BITALG__ 1
@@ -1888,13 +1893,21 @@
// CHECK_GNR_M32: #define __AVX512VL__ 1
// CHECK_GNR_M32: #define __AVX512VNNI__ 1
// CHECK_GNR_M32: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_DMR_M32: #define __AVXIFMA__ 1
+// CHECK_DMR_M32: #define __AVXNECONVERT__ 1
+// CHECK_DMR_M32: #define __AVXVNNIINT16__ 1
+// CHECK_DMR_M32: #define __AVXVNNIINT8__ 1
// CHECK_GNR_M32: #define __AVXVNNI__ 1
// CHECK_GNR_M32: #define __AVX__ 1
// CHECK_GNR_M32: #define __BMI2__ 1
// CHECK_GNR_M32: #define __BMI__ 1
+// CHECK_DMR_M32: #define __CCMP__ 1
+// CHECK_DMR_M32: #define __CF__ 1
// CHECK_GNR_M32: #define __CLDEMOTE__ 1
// CHECK_GNR_M32: #define __CLFLUSHOPT__ 1
// CHECK_GNR_M32: #define __CLWB__ 1
+// CHECK_DMR_M32: #define __CMPCCXADD__ 1
+// CHECK_DMR_M32: #define __EGPR__ 1
// CHECK_GNR_M32: #define __ENQCMD__ 1
// CHECK_GNR_M32: #define __EVEX256__ 1
// CHECK_GNR_M32: #define __EVEX512__ 1
@@ -1905,20 +1918,27 @@
// CHECK_GNR_M32: #define __LZCNT__ 1
// CHECK_GNR_M32: #define __MMX__ 1
// CHECK_GNR_M32: #define __MOVBE__ 1
+// CHECK_DMR_M32: #define __NDD__ 1
+// CHECK_DMR_M32: #define __NF__ 1
// CHECK_GNR_M32: #define __PCLMUL__ 1
// CHECK_GNR_M32: #define __PCONFIG__ 1
// CHECK_GNR_M32: #define __PKU__ 1
// CHECK_GNR_M32: #define __POPCNT__ 1
+// CHECK_DMR_M32: #define __PPX__ 1
// CHECK_GNR_M32: #define __PREFETCHI__ 1
// CHECK_GNR_M32: #define __PRFCHW__ 1
// CHECK_GNR_M32: #define __PTWRITE__ 1
+// CHECK_DMR_M32: #define __PUSH2POP2__ 1
// CHECK_GNR_M32: #define __RDPID__ 1
// CHECK_GNR_M32: #define __RDRND__ 1
// CHECK_GNR_M32: #define __RDSEED__ 1
// CHECK_GNR_M32: #define __SERIALIZE__ 1
// CHECK_GNR_M32: #define __SGX__ 1
+// CHECK_DMR_M32: #define __SHA512__ 1
// CHECK_GNR_M32: #define __SHA__ 1
// CHECK_GNR_M32: #define __SHSTK__ 1
+// CHECK_DMR_M32: #define __SM3__ 1
+// CHECK_DMR_M32: #define __SM4__ 1
// CHECK_GNR_M32: #define __SSE2__ 1
// CHECK_GNR_M32: #define __SSE3__ 1
// CHECK_GNR_M32: #define __SSE4_1__ 1
@@ -1935,6 +1955,7 @@
// CHECK_GNR_M32: #define __XSAVEOPT__ 1
// CHECK_GNR_M32: #define __XSAVES__ 1
// CHECK_GNR_M32: #define __XSAVE__ 1
+// CHECK_DMR_M32: #define __ZU__ 1
// CHECK_GNR_M32: #define __corei7 1
// CHECK_GNR_M32: #define __corei7__ 1
// CHECK_GNR_M32: #define __i386 1
@@ -1948,6 +1969,9 @@
// RUN: %clang -march=graniterapids-d -m64 -E -dM %s -o - 2>&1 \
// RUN: --target=x86_64 \
// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M64,CHECK_GNRD_M64
+// RUN: %clang -march=diamondrapids -m64 -E -dM %s -o - 2>&1 \
+// RUN: --target=x86_64 \
+// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M64,CHECK_GNRD_M64,CHECK_DMR_M64
// CHECK_GNR_M64: #define __AES__ 1
// CHECK_GNR_M64: #define __AMX_BF16__ 1
// CHECK_GNR_M64-NOT: #define __AMX_COMPLEX__ 1
@@ -1955,6 +1979,8 @@
// CHECK_GNR_M64: #define __AMX_FP16__ 1
// CHECK_GNR_M64: #define __AMX_INT8__ 1
// CHECK_GNR_M64: #define __AMX_TILE__ 1
+// CHECK_DMR_M64: #define __AVX10_2_512__ 1
+// CHECK_DMR_M64: #define __AVX10_2__ 1
// CHECK_GNR_M64: #define __AVX2__ 1
// CHECK_GNR_M64: #define __AVX512BF16__ 1
// CHECK_GNR_M64: #define __AVX512BITALG__ 1
@@ -1969,13 +1995,21 @@
// CHECK_GNR_M64: #define __AVX512VL__ 1
// CHECK_GNR_M64: #define __AVX512VNNI__ 1
// CHECK_GNR_M64: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_DMR_M64: #define __AVXIFMA__ 1
+// CHECK_DMR_M64: #define __AVXNECONVERT__ 1
+// CHECK_DMR_M64: #define __AVXVNNIINT16__ 1
+// CHECK_DMR_M64: #define __AVXVNNIINT8__ 1
// CHECK_GNR_M64: #define __AVXVNNI__ 1
// CHECK_GNR_M64: #define __AVX__ 1
// CHECK_GNR_M64: #define __BMI2__ 1
// CHECK_GNR_M64: #define __BMI__ 1
+// CHECK_DMR_M64: #define __CCMP__ 1
+// CHECK_DMR_M64: #define __CF__ 1
// CHECK_GNR_M64: #define __CLDEMOTE__ 1
// CHECK_GNR_M64: #define __CLFLUSHOPT__ 1
// CHECK_GNR_M64: #define __CLWB__ 1
+// CHECK_DMR_M64: #define __CMPCCXADD__ 1
+// CHECK_DMR_M64: #define __EGPR__ 1
// CHECK_GNR_M64: #define __ENQCMD__ 1
// CHECK_GNR_M64: #define __EVEX256__ 1
// CHECK_GNR_M64: #define __EVEX512__ 1
@@ -1986,20 +2020,27 @@
// CHECK_GNR_M64: #define __LZCNT__ 1
// CHECK_GNR_M64: #define __MMX__ 1
// CHECK_GNR_M64: #define __MOVBE__ 1
+// CHECK_DMR_M64: #define __NDD__ 1
+// CHECK_DMR_M64: #define __NF__ 1
// CHECK_GNR_M64: #define __PCLMUL__ 1
// CHECK_GNR_M64: #define __PCONFIG__ 1
// CHECK_GNR_M64: #define __PKU__ 1
// CHECK_GNR_M64: #define __POPCNT__ 1
+// CHECK_DMR_M64: #define __PPX__ 1
// CHECK_GNR_M64: #define __PREFETCHI__ 1
// CHECK_GNR_M64: #define __PRFCHW__ 1
// CHECK_GNR_M64: #define __PTWRITE__ 1
+// CHECK_DMR_M64: #define __PUSH2POP2__ 1
// CHECK_GNR_M64: #define __RDPID__ 1
// CHECK_GNR_M64: #define __RDRND__ 1
// CHECK_GNR_M64: #define __RDSEED__ 1
// CHECK_GNR_M64: #define __SERIALIZE__ 1
// CHECK_GNR_M64: #define __SGX__ 1
+// CHECK_DMR_M64: #define __SHA512__ 1
// CHECK_GNR_M64: #define __SHA__ 1
// CHECK_GNR_M64: #define __SHSTK__ 1
+// CHECK_DMR_M64: #define __SM3__ 1
+// CHECK_DMR_M64: #define __SM4__ 1
// CHECK_GNR_M64: #define __SSE2__ 1
// CHECK_GNR_M64: #define __SSE3__ 1
// CHECK_GNR_M64: #define __SSE4_1__ 1
@@ -2016,6 +2057,7 @@
// CHECK_GNR_M64: #define __XSAVEOPT__ 1
// CHECK_GNR_M64: #define __XSAVES__ 1
// CHECK_GNR_M64: #define __XSAVE__ 1
+// CHECK_DMR_M64: #define __ZU__ 1
// CHECK_GNR_M64: #define __amd64 1
// CHECK_GNR_M64: #define __amd64__ 1
// CHECK_GNR_M64: #define __corei7 1
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index bfa478c4427a5b..6dd3f69f5bcbbb 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -103,6 +103,7 @@ enum ProcessorSubtypes {
INTEL_COREI7_ARROWLAKE_S,
INTEL_COREI7_PANTHERLAKE,
AMDFAM1AH_ZNVER5,
+ INTEL_COREI7_DIAMONDRAPIDS,
CPU_SUBTYPE_MAX
};
@@ -600,6 +601,19 @@ static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
break;
}
break;
+ case 19:
+ switch (Model) {
+ // Diamondrapids:
+ case 0x01:
+ CPU = "diamondrapids";
+ *Type = INTEL_COREI7;
+ *Subtype = INTEL_COREI7_DIAMONDRAPIDS;
+ break;
+
+ default: // Unknown family 0x13 CPU.
+ break;
+ }
+ break;
default:
break; // Unknown.
}
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 073e19f8187c65..9ba667d0e9187e 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -107,6 +107,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE, "arrowlake")
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S, "arrowlake-s")
X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE, "pantherlake")
X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5, "znver5")
+X86_CPU_SUBTYPE(INTEL_COREI7_DIAMONDRAPIDS, "diamondrapids")
// Alternate names supported by __builtin_cpu_is and target multiversioning.
X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h
index 0e17c4674719cf..aafa77175b90e7 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@@ -121,6 +121,7 @@ enum CPUKind {
CK_GraniterapidsD,
CK_Emeraldrapids,
CK_Clearwaterforest,
+ CK_DiamondRapids,
CK_KNL,
CK_KNM,
CK_Lakemont,
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 6bedf9e1d13ac3..764f62f2ab5ccb 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1139,6 +1139,28 @@ def ProcessorFeatures {
list<SubtargetFeature> GNRDFeatures =
!listconcat(GNRFeatures, GNRDAdditionalFeatures);
+ // Diamondrapids
+ list<SubtargetFeature> DMRAdditionalFeatures = [FeatureAVX10_2_512,
+ FeatureAMXCOMPLEX,
+ FeatureSM4,
+ FeatureCMPCCXADD,
+ FeatureAVXIFMA,
+ FeatureAVXNECONVERT,
+ FeatureAVXVNNIINT8,
+ FeatureAVXVNNIINT16,
+ FeatureSHA512,
+ FeatureSM3,
+ FeatureEGPR,
+ FeatureZU,
+ FeatureCCMP,
+ FeaturePush2Pop2,
+ FeaturePPX,
+ FeatureNDD,
+ FeatureNF,
+ FeatureCF];
+ list<SubtargetFeature> DMRFeatures =
+ !listconcat(GNRDFeatures, DMRAdditionalFeatures);
+
// Atom
list<SubtargetFeature> AtomFeatures = [FeatureX87,
FeatureCX8,
@@ -1840,6 +1862,8 @@ foreach P = ["graniterapids-d", "graniterapids_d"] in {
def : ProcModel<P, SapphireRapidsModel,
ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>;
}
+def : ProcModel<"diamondrapids", SapphireRapidsModel,
+ ProcessorFeatures.DMRFeatures, ProcessorFeatures.GNRTuning>;
// AMD CPUs.
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 5c4e3a9dc52b0f..88ee2787c141f6 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1006,6 +1006,19 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family,
CPU = "pentium4";
break;
}
+ case 19:
+ switch (Model) {
+ // Diamondrapids:
+ case 0x01:
+ CPU = "diamondrapids";
+ *Type = X86::INTEL_COREI7;
+ *Subtype = X86::INTEL_COREI7_DIAMONDRAPIDS;
+ break;
+
+ default: // Unknown family 0x13 CPU.
+ break;
+ }
+ break;
default:
break; // Unknown.
}
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 586df5748aa822..f21ccf2b4b8f6e 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -138,6 +138,12 @@ constexpr FeatureBitset FeaturesSapphireRapids =
FeatureWAITPKG;
constexpr FeatureBitset FeaturesGraniteRapids =
FeaturesSapphireRapids | FeatureAMX_FP16 | FeaturePREFETCHI;
+constexpr FeatureBitset FeaturesDiamondRapids =
+ FeaturesGraniteRapids | FeatureAMX_COMPLEX | FeatureAVX10_2_512 |
+ FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT | FeatureAVXVNNIINT8 |
+ FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeatureEGPR |
+ FeatureZU | FeatureCCMP | FeaturePush2Pop2 | FeaturePPX | FeatureNDD | FeatureNF |
+ FeatureCF;
// Intel Atom processors.
// Bonnell has feature parity with Core2 and adds MOVBE.
@@ -381,6 +387,8 @@ constexpr ProcInfo Processors[] = {
{ {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
// Clearwaterforest microarchitecture based processors.
{ {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
+ // Diamondrapids microarchitecture based processors.
+ { {"diamondrapids"}, CK_DiamondRapids, FEATURE_AVX10_2_512, FeaturesDiamondRapids, 'z', false },
// Knights Landing processor.
{ {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false },
{ {"mic_avx512"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', true },
diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll
index 5e4d09e081fec9..40c38c2e828498 100644
--- a/llvm/test/CodeGen/X86/cpus-intel.ll
+++ b/llvm/test/CodeGen/X86/cpus-intel.ll
@@ -39,6 +39,7 @@
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@@ -104,6 +105,7 @@
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
define void @foo() {
ret void
>From eef0752518ec31f0e7160da3e68c75020e87e7b1 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 28 Oct 2024 16:33:01 +0800
Subject: [PATCH 2/5] clang-format
---
llvm/lib/TargetParser/X86TargetParser.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index f21ccf2b4b8f6e..bbe214fe577c0e 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -140,10 +140,10 @@ constexpr FeatureBitset FeaturesGraniteRapids =
FeaturesSapphireRapids | FeatureAMX_FP16 | FeaturePREFETCHI;
constexpr FeatureBitset FeaturesDiamondRapids =
FeaturesGraniteRapids | FeatureAMX_COMPLEX | FeatureAVX10_2_512 |
- FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT | FeatureAVXVNNIINT8 |
- FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeatureEGPR |
- FeatureZU | FeatureCCMP | FeaturePush2Pop2 | FeaturePPX | FeatureNDD | FeatureNF |
- FeatureCF;
+ FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT |
+ FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
+ FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 |
+ FeaturePPX | FeatureNDD | FeatureNF | FeatureCF;
// Intel Atom processors.
// Bonnell has feature parity with Core2 and adds MOVBE.
>From cb420a3a48e93af8fd5f5352e33a070b1ca313d0 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 28 Oct 2024 16:36:19 +0800
Subject: [PATCH 3/5] release note changes
---
clang/docs/ReleaseNotes.rst | 2 ++
llvm/docs/ReleaseNotes.md | 2 ++
2 files changed, 4 insertions(+)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 31ee4f7e516fed..387471f5c3d41f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -647,6 +647,8 @@ X86 Support
- Supported intrinsics for ``MOVRS AND AVX10.2``.
* Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
+- Supported ``-march/tune=diamondrapids``
+
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index f7215279940d69..59c40df100c3d9 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -224,6 +224,8 @@ Changes to the X86 Backend
* Supported ISA of `MSR_IMM`.
+* Supported ``-mcpu=diamondrapids``
+
Changes to the OCaml bindings
-----------------------------
>From 52e63a0f4b48ad106471343f2f48976bd9b84661 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Fri, 15 Nov 2024 13:14:26 +0800
Subject: [PATCH 4/5] address comments
---
compiler-rt/lib/builtins/cpu_model/x86.c | 4 ++--
llvm/lib/Target/X86/X86.td | 2 +-
llvm/lib/TargetParser/Host.cpp | 4 ++--
llvm/lib/TargetParser/X86TargetParser.cpp | 2 +-
4 files changed, 6 insertions(+), 6 deletions(-)
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 6dd3f69f5bcbbb..3315dceb7f5622 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -603,14 +603,14 @@ static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
break;
case 19:
switch (Model) {
- // Diamondrapids:
+ // Diamond Rapids:
case 0x01:
CPU = "diamondrapids";
*Type = INTEL_COREI7;
*Subtype = INTEL_COREI7_DIAMONDRAPIDS;
break;
- default: // Unknown family 0x13 CPU.
+ default: // Unknown family 19 CPU.
break;
}
break;
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 764f62f2ab5ccb..ccb033ef220a02 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1139,7 +1139,7 @@ def ProcessorFeatures {
list<SubtargetFeature> GNRDFeatures =
!listconcat(GNRFeatures, GNRDAdditionalFeatures);
- // Diamondrapids
+ // Diamond Rapids
list<SubtargetFeature> DMRAdditionalFeatures = [FeatureAVX10_2_512,
FeatureAMXCOMPLEX,
FeatureSM4,
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 88ee2787c141f6..91fc2e6e896dd1 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1008,14 +1008,14 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family,
}
case 19:
switch (Model) {
- // Diamondrapids:
+ // Diamond Rapids:
case 0x01:
CPU = "diamondrapids";
*Type = X86::INTEL_COREI7;
*Subtype = X86::INTEL_COREI7_DIAMONDRAPIDS;
break;
- default: // Unknown family 0x13 CPU.
+ default: // Unknown family 19 CPU.
break;
}
break;
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index bbe214fe577c0e..b5897cccb1b0aa 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -387,7 +387,7 @@ constexpr ProcInfo Processors[] = {
{ {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
// Clearwaterforest microarchitecture based processors.
{ {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
- // Diamondrapids microarchitecture based processors.
+ // Diamond Rapids microarchitecture based processors.
{ {"diamondrapids"}, CK_DiamondRapids, FEATURE_AVX10_2_512, FeaturesDiamondRapids, 'z', false },
// Knights Landing processor.
{ {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false },
>From d584d06fc5ff85e827a041e107a4df0b546733c0 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Fri, 15 Nov 2024 13:35:21 +0800
Subject: [PATCH 5/5] Update with recently merged ISAs.
---
clang/test/Preprocessor/predefined-arch-macros.c | 12 ++++++++++++
llvm/lib/Target/X86/X86.td | 8 +++++++-
llvm/lib/TargetParser/X86TargetParser.cpp | 4 +++-
3 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 41bec61079d827..20aa2d4e0a54cb 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -1871,12 +1871,17 @@
// RUN: --target=i386 \
// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M32,CHECK_GNRD_M32,CHECK_DMR_M32
// CHECK_GNR_M32: #define __AES__ 1
+// CHECK_DMR_M32: #define __AMX_AVX512__ 1
// CHECK_GNR_M32: #define __AMX_BF16__ 1
// CHECK_GNR_M32-NOT: #define __AMX_COMPLEX__ 1
// CHECK_GNRD_M32: #define __AMX_COMPLEX__ 1
// CHECK_GNR_M32: #define __AMX_FP16__ 1
+// CHECK_DMR_M32: #define __AMX_FP8__ 1
// CHECK_GNR_M32: #define __AMX_INT8__ 1
+// CHECK_DMR_M32: #define __AMX_MOVRS__ 1
+// CHECK_DMR_M32: #define __AMX_TF32__ 1
// CHECK_GNR_M32: #define __AMX_TILE__ 1
+// CHECK_DMR_M32: #define __AMX_TRANSPOSE__ 1
// CHECK_DMR_M32: #define __AVX10_2_512__ 1
// CHECK_DMR_M32: #define __AVX10_2__ 1
// CHECK_GNR_M32: #define __AVX2__ 1
@@ -1918,6 +1923,7 @@
// CHECK_GNR_M32: #define __LZCNT__ 1
// CHECK_GNR_M32: #define __MMX__ 1
// CHECK_GNR_M32: #define __MOVBE__ 1
+// CHECK_DMR_M32: #define __MOVRS__ 1
// CHECK_DMR_M32: #define __NDD__ 1
// CHECK_DMR_M32: #define __NF__ 1
// CHECK_GNR_M32: #define __PCLMUL__ 1
@@ -1973,12 +1979,17 @@
// RUN: --target=x86_64 \
// RUN: | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M64,CHECK_GNRD_M64,CHECK_DMR_M64
// CHECK_GNR_M64: #define __AES__ 1
+// CHECK_DMR_M64: #define __AMX_AVX512__ 1
// CHECK_GNR_M64: #define __AMX_BF16__ 1
// CHECK_GNR_M64-NOT: #define __AMX_COMPLEX__ 1
// CHECK_GNRD_M64: #define __AMX_COMPLEX__ 1
// CHECK_GNR_M64: #define __AMX_FP16__ 1
+// CHECK_DMR_M64: #define __AMX_FP8__ 1
// CHECK_GNR_M64: #define __AMX_INT8__ 1
+// CHECK_DMR_M64: #define __AMX_MOVRS__ 1
+// CHECK_DMR_M64: #define __AMX_TF32__ 1
// CHECK_GNR_M64: #define __AMX_TILE__ 1
+// CHECK_DMR_M64: #define __AMX_TRANSPOSE__ 1
// CHECK_DMR_M64: #define __AVX10_2_512__ 1
// CHECK_DMR_M64: #define __AVX10_2__ 1
// CHECK_GNR_M64: #define __AVX2__ 1
@@ -2020,6 +2031,7 @@
// CHECK_GNR_M64: #define __LZCNT__ 1
// CHECK_GNR_M64: #define __MMX__ 1
// CHECK_GNR_M64: #define __MOVBE__ 1
+// CHECK_DMR_M64: #define __MOVRS__ 1
// CHECK_DMR_M64: #define __NDD__ 1
// CHECK_DMR_M64: #define __NF__ 1
// CHECK_GNR_M64: #define __PCLMUL__ 1
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 23f03dc9ad0a0d..11a6e06ba47d82 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1173,7 +1173,13 @@ def ProcessorFeatures {
FeaturePPX,
FeatureNDD,
FeatureNF,
- FeatureCF];
+ FeatureCF,
+ FeatureMOVRS,
+ FeatureAMXMOVRS,
+ FeatureAMXAVX512,
+ FeatureAMXFP8,
+ FeatureAMXTF32,
+ FeatureAMXTRANSPOSE];
list<SubtargetFeature> DMRFeatures =
!listconcat(GNRDFeatures, DMRAdditionalFeatures);
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 5e4dc33e5fb80b..c84f3d917e7cb1 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -143,7 +143,9 @@ constexpr FeatureBitset FeaturesDiamondRapids =
FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT |
FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 |
- FeaturePPX | FeatureNDD | FeatureNF | FeatureCF;
+ FeaturePPX | FeatureNDD | FeatureNF | FeatureCF | FeatureMOVRS |
+ FeatureAMX_MOVRS | FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 |
+ FeatureAMX_TRANSPOSE;
// Intel Atom processors.
// Bonnell has feature parity with Core2 and adds MOVBE.
More information about the cfe-commits
mailing list