[clang] [compiler-rt] [llvm] [X86] Support -march=diamondrapids (PR #113881)

Freddy Ye via cfe-commits cfe-commits at lists.llvm.org
Thu Nov 14 21:52:09 PST 2024


https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/113881

>From 4a7f17f29a007ce7af0893670a362f738b0d8d6e Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 21 Oct 2024 15:27:24 +0800
Subject: [PATCH 1/5] [X86] Support -march=diamondrapids

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368
---
 clang/lib/Basic/Targets/X86.cpp               |  2 +
 clang/test/CodeGen/attr-cpuspecific-cpus.c    |  1 +
 clang/test/CodeGen/attr-target-mv.c           |  1 +
 clang/test/CodeGen/target-builtin-noerror.c   |  1 +
 clang/test/Driver/x86-march.c                 |  4 ++
 clang/test/Misc/target-invalid-cpu-note/x86.c |  4 ++
 .../Preprocessor/predefined-arch-macros.c     | 42 +++++++++++++++++++
 compiler-rt/lib/builtins/cpu_model/x86.c      | 14 +++++++
 .../llvm/TargetParser/X86TargetParser.def     |  1 +
 .../llvm/TargetParser/X86TargetParser.h       |  1 +
 llvm/lib/Target/X86/X86.td                    | 24 +++++++++++
 llvm/lib/TargetParser/Host.cpp                | 13 ++++++
 llvm/lib/TargetParser/X86TargetParser.cpp     |  8 ++++
 llvm/test/CodeGen/X86/cpus-intel.ll           |  2 +
 14 files changed, 118 insertions(+)

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index d067ec218b5270..7787bbde513dcf 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -649,6 +649,7 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
   case CK_GraniterapidsD:
   case CK_Emeraldrapids:
   case CK_Clearwaterforest:
+  case CK_DiamondRapids:
     // FIXME: Historically, we defined this legacy name, it would be nice to
     // remove it at some point. We've never exposed fine-grained names for
     // recent primary x86 CPUs, and we should keep it that way.
@@ -1613,6 +1614,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
     case CK_GraniterapidsD:
     case CK_Emeraldrapids:
     case CK_Clearwaterforest:
+    case CK_DiamondRapids:
     case CK_KNL:
     case CK_KNM:
     // K7
diff --git a/clang/test/CodeGen/attr-cpuspecific-cpus.c b/clang/test/CodeGen/attr-cpuspecific-cpus.c
index dd154fd227b25b..1dd095ec9e191f 100644
--- a/clang/test/CodeGen/attr-cpuspecific-cpus.c
+++ b/clang/test/CodeGen/attr-cpuspecific-cpus.c
@@ -43,6 +43,7 @@ ATTR(cpu_specific(icelake_client)) void CPU(void){}
 ATTR(cpu_specific(tigerlake)) void CPU(void){}
 ATTR(cpu_specific(alderlake)) void CPU(void){}
 ATTR(cpu_specific(sapphirerapids)) void CPU(void){}
+ATTR(cpu_specific(diamondrapids)) void CPU(void){}
 
 // ALIAS CPUs
 ATTR(cpu_specific(pentium_iii_no_xmm_regs)) void CPU0(void){}
diff --git a/clang/test/CodeGen/attr-target-mv.c b/clang/test/CodeGen/attr-target-mv.c
index 2c4b95ca04370a..6911b55203b7e7 100644
--- a/clang/test/CodeGen/attr-target-mv.c
+++ b/clang/test/CodeGen/attr-target-mv.c
@@ -29,6 +29,7 @@ int __attribute__((target("arch=lunarlake"))) foo(void) {return 23;}
 int __attribute__((target("arch=gracemont"))) foo(void) {return 24;}
 int __attribute__((target("arch=pantherlake"))) foo(void) {return 25;}
 int __attribute__((target("arch=clearwaterforest"))) foo(void) {return 26;}
+int __attribute__((target("arch=diamondrapids"))) foo(void) {return 27;}
 int __attribute__((target("default"))) foo(void) { return 2; }
 
 int bar(void) {
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 1e53621bc6b5ae..0bbd8c3e5ddd81 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -209,4 +209,5 @@ void verifycpustrings(void) {
   (void)__builtin_cpu_is("znver3");
   (void)__builtin_cpu_is("znver4");
   (void)__builtin_cpu_is("znver5");
+  (void)__builtin_cpu_is("diamondrapids");
 }
diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c
index 3bc2a82ae778d6..341f01c8d668df 100644
--- a/clang/test/Driver/x86-march.c
+++ b/clang/test/Driver/x86-march.c
@@ -120,6 +120,10 @@
 // RUN:   | FileCheck %s -check-prefix=clearwaterforest
 // clearwaterforest: "-target-cpu" "clearwaterforest"
 //
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=diamondrapids 2>&1 \
+// RUN:   | FileCheck %s -check-prefix=diamondrapids
+// diamondrapids: "-target-cpu" "diamondrapids"
+//
 // RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=lakemont 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=lakemont
 // lakemont: "-target-cpu" "lakemont"
diff --git a/clang/test/Misc/target-invalid-cpu-note/x86.c b/clang/test/Misc/target-invalid-cpu-note/x86.c
index 7879676040af46..f89cdc2aa573ff 100644
--- a/clang/test/Misc/target-invalid-cpu-note/x86.c
+++ b/clang/test/Misc/target-invalid-cpu-note/x86.c
@@ -69,6 +69,7 @@
 // X86-SAME: {{^}}, graniterapids-d
 // X86-SAME: {{^}}, emeraldrapids
 // X86-SAME: {{^}}, clearwaterforest
+// X86-SAME: {{^}}, diamondrapids
 // X86-SAME: {{^}}, knl
 // X86-SAME: {{^}}, knm
 // X86-SAME: {{^}}, lakemont
@@ -155,6 +156,7 @@
 // X86_64-SAME: {{^}}, graniterapids-d
 // X86_64-SAME: {{^}}, emeraldrapids
 // X86_64-SAME: {{^}}, clearwaterforest
+// X86_64-SAME: {{^}}, diamondrapids
 // X86_64-SAME: {{^}}, knl
 // X86_64-SAME: {{^}}, knm
 // X86_64-SAME: {{^}}, k8
@@ -250,6 +252,7 @@
 // TUNE_X86-SAME: {{^}}, graniterapids-d
 // TUNE_X86-SAME: {{^}}, emeraldrapids
 // TUNE_X86-SAME: {{^}}, clearwaterforest
+// TUNE_X86-SAME: {{^}}, diamondrapids
 // TUNE_X86-SAME: {{^}}, knl
 // TUNE_X86-SAME: {{^}}, knm
 // TUNE_X86-SAME: {{^}}, lakemont
@@ -352,6 +355,7 @@
 // TUNE_X86_64-SAME: {{^}}, graniterapids-d
 // TUNE_X86_64-SAME: {{^}}, emeraldrapids
 // TUNE_X86_64-SAME: {{^}}, clearwaterforest
+// TUNE_X86_64-SAME: {{^}}, diamondrapids
 // TUNE_X86_64-SAME: {{^}}, knl
 // TUNE_X86_64-SAME: {{^}}, knm
 // TUNE_X86_64-SAME: {{^}}, lakemont
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 35801e758cc58a..41bec61079d827 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -1867,6 +1867,9 @@
 // RUN: %clang -march=graniterapids-d -m32 -E -dM %s -o - 2>&1 \
 // RUN:     --target=i386 \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M32,CHECK_GNRD_M32
+// RUN: %clang -march=diamondrapids -m32 -E -dM %s -o - 2>&1 \
+// RUN:     --target=i386 \
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M32,CHECK_GNRD_M32,CHECK_DMR_M32
 // CHECK_GNR_M32: #define __AES__ 1
 // CHECK_GNR_M32: #define __AMX_BF16__ 1
 // CHECK_GNR_M32-NOT: #define __AMX_COMPLEX__ 1
@@ -1874,6 +1877,8 @@
 // CHECK_GNR_M32: #define __AMX_FP16__ 1
 // CHECK_GNR_M32: #define __AMX_INT8__ 1
 // CHECK_GNR_M32: #define __AMX_TILE__ 1
+// CHECK_DMR_M32: #define __AVX10_2_512__ 1
+// CHECK_DMR_M32: #define __AVX10_2__ 1
 // CHECK_GNR_M32: #define __AVX2__ 1
 // CHECK_GNR_M32: #define __AVX512BF16__ 1
 // CHECK_GNR_M32: #define __AVX512BITALG__ 1
@@ -1888,13 +1893,21 @@
 // CHECK_GNR_M32: #define __AVX512VL__ 1
 // CHECK_GNR_M32: #define __AVX512VNNI__ 1
 // CHECK_GNR_M32: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_DMR_M32: #define __AVXIFMA__ 1
+// CHECK_DMR_M32: #define __AVXNECONVERT__ 1
+// CHECK_DMR_M32: #define __AVXVNNIINT16__ 1
+// CHECK_DMR_M32: #define __AVXVNNIINT8__ 1
 // CHECK_GNR_M32: #define __AVXVNNI__ 1
 // CHECK_GNR_M32: #define __AVX__ 1
 // CHECK_GNR_M32: #define __BMI2__ 1
 // CHECK_GNR_M32: #define __BMI__ 1
+// CHECK_DMR_M32: #define __CCMP__ 1
+// CHECK_DMR_M32: #define __CF__ 1
 // CHECK_GNR_M32: #define __CLDEMOTE__ 1
 // CHECK_GNR_M32: #define __CLFLUSHOPT__ 1
 // CHECK_GNR_M32: #define __CLWB__ 1
+// CHECK_DMR_M32: #define __CMPCCXADD__ 1
+// CHECK_DMR_M32: #define __EGPR__ 1
 // CHECK_GNR_M32: #define __ENQCMD__ 1
 // CHECK_GNR_M32: #define __EVEX256__ 1
 // CHECK_GNR_M32: #define __EVEX512__ 1
@@ -1905,20 +1918,27 @@
 // CHECK_GNR_M32: #define __LZCNT__ 1
 // CHECK_GNR_M32: #define __MMX__ 1
 // CHECK_GNR_M32: #define __MOVBE__ 1
+// CHECK_DMR_M32: #define __NDD__ 1
+// CHECK_DMR_M32: #define __NF__ 1
 // CHECK_GNR_M32: #define __PCLMUL__ 1
 // CHECK_GNR_M32: #define __PCONFIG__ 1
 // CHECK_GNR_M32: #define __PKU__ 1
 // CHECK_GNR_M32: #define __POPCNT__ 1
+// CHECK_DMR_M32: #define __PPX__ 1
 // CHECK_GNR_M32: #define __PREFETCHI__ 1
 // CHECK_GNR_M32: #define __PRFCHW__ 1
 // CHECK_GNR_M32: #define __PTWRITE__ 1
+// CHECK_DMR_M32: #define __PUSH2POP2__ 1
 // CHECK_GNR_M32: #define __RDPID__ 1
 // CHECK_GNR_M32: #define __RDRND__ 1
 // CHECK_GNR_M32: #define __RDSEED__ 1
 // CHECK_GNR_M32: #define __SERIALIZE__ 1
 // CHECK_GNR_M32: #define __SGX__ 1
+// CHECK_DMR_M32: #define __SHA512__ 1
 // CHECK_GNR_M32: #define __SHA__ 1
 // CHECK_GNR_M32: #define __SHSTK__ 1
+// CHECK_DMR_M32: #define __SM3__ 1
+// CHECK_DMR_M32: #define __SM4__ 1
 // CHECK_GNR_M32: #define __SSE2__ 1
 // CHECK_GNR_M32: #define __SSE3__ 1
 // CHECK_GNR_M32: #define __SSE4_1__ 1
@@ -1935,6 +1955,7 @@
 // CHECK_GNR_M32: #define __XSAVEOPT__ 1
 // CHECK_GNR_M32: #define __XSAVES__ 1
 // CHECK_GNR_M32: #define __XSAVE__ 1
+// CHECK_DMR_M32: #define __ZU__ 1
 // CHECK_GNR_M32: #define __corei7 1
 // CHECK_GNR_M32: #define __corei7__ 1
 // CHECK_GNR_M32: #define __i386 1
@@ -1948,6 +1969,9 @@
 // RUN: %clang -march=graniterapids-d -m64 -E -dM %s -o - 2>&1 \
 // RUN:     --target=x86_64 \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M64,CHECK_GNRD_M64
+// RUN: %clang -march=diamondrapids -m64 -E -dM %s -o - 2>&1 \
+// RUN:     --target=x86_64 \
+// RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M64,CHECK_GNRD_M64,CHECK_DMR_M64
 // CHECK_GNR_M64: #define __AES__ 1
 // CHECK_GNR_M64: #define __AMX_BF16__ 1
 // CHECK_GNR_M64-NOT: #define __AMX_COMPLEX__ 1
@@ -1955,6 +1979,8 @@
 // CHECK_GNR_M64: #define __AMX_FP16__ 1
 // CHECK_GNR_M64: #define __AMX_INT8__ 1
 // CHECK_GNR_M64: #define __AMX_TILE__ 1
+// CHECK_DMR_M64: #define __AVX10_2_512__ 1
+// CHECK_DMR_M64: #define __AVX10_2__ 1
 // CHECK_GNR_M64: #define __AVX2__ 1
 // CHECK_GNR_M64: #define __AVX512BF16__ 1
 // CHECK_GNR_M64: #define __AVX512BITALG__ 1
@@ -1969,13 +1995,21 @@
 // CHECK_GNR_M64: #define __AVX512VL__ 1
 // CHECK_GNR_M64: #define __AVX512VNNI__ 1
 // CHECK_GNR_M64: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_DMR_M64: #define __AVXIFMA__ 1
+// CHECK_DMR_M64: #define __AVXNECONVERT__ 1
+// CHECK_DMR_M64: #define __AVXVNNIINT16__ 1
+// CHECK_DMR_M64: #define __AVXVNNIINT8__ 1
 // CHECK_GNR_M64: #define __AVXVNNI__ 1
 // CHECK_GNR_M64: #define __AVX__ 1
 // CHECK_GNR_M64: #define __BMI2__ 1
 // CHECK_GNR_M64: #define __BMI__ 1
+// CHECK_DMR_M64: #define __CCMP__ 1
+// CHECK_DMR_M64: #define __CF__ 1
 // CHECK_GNR_M64: #define __CLDEMOTE__ 1
 // CHECK_GNR_M64: #define __CLFLUSHOPT__ 1
 // CHECK_GNR_M64: #define __CLWB__ 1
+// CHECK_DMR_M64: #define __CMPCCXADD__ 1
+// CHECK_DMR_M64: #define __EGPR__ 1
 // CHECK_GNR_M64: #define __ENQCMD__ 1
 // CHECK_GNR_M64: #define __EVEX256__ 1
 // CHECK_GNR_M64: #define __EVEX512__ 1
@@ -1986,20 +2020,27 @@
 // CHECK_GNR_M64: #define __LZCNT__ 1
 // CHECK_GNR_M64: #define __MMX__ 1
 // CHECK_GNR_M64: #define __MOVBE__ 1
+// CHECK_DMR_M64: #define __NDD__ 1
+// CHECK_DMR_M64: #define __NF__ 1
 // CHECK_GNR_M64: #define __PCLMUL__ 1
 // CHECK_GNR_M64: #define __PCONFIG__ 1
 // CHECK_GNR_M64: #define __PKU__ 1
 // CHECK_GNR_M64: #define __POPCNT__ 1
+// CHECK_DMR_M64: #define __PPX__ 1
 // CHECK_GNR_M64: #define __PREFETCHI__ 1
 // CHECK_GNR_M64: #define __PRFCHW__ 1
 // CHECK_GNR_M64: #define __PTWRITE__ 1
+// CHECK_DMR_M64: #define __PUSH2POP2__ 1
 // CHECK_GNR_M64: #define __RDPID__ 1
 // CHECK_GNR_M64: #define __RDRND__ 1
 // CHECK_GNR_M64: #define __RDSEED__ 1
 // CHECK_GNR_M64: #define __SERIALIZE__ 1
 // CHECK_GNR_M64: #define __SGX__ 1
+// CHECK_DMR_M64: #define __SHA512__ 1
 // CHECK_GNR_M64: #define __SHA__ 1
 // CHECK_GNR_M64: #define __SHSTK__ 1
+// CHECK_DMR_M64: #define __SM3__ 1
+// CHECK_DMR_M64: #define __SM4__ 1
 // CHECK_GNR_M64: #define __SSE2__ 1
 // CHECK_GNR_M64: #define __SSE3__ 1
 // CHECK_GNR_M64: #define __SSE4_1__ 1
@@ -2016,6 +2057,7 @@
 // CHECK_GNR_M64: #define __XSAVEOPT__ 1
 // CHECK_GNR_M64: #define __XSAVES__ 1
 // CHECK_GNR_M64: #define __XSAVE__ 1
+// CHECK_DMR_M64: #define __ZU__ 1
 // CHECK_GNR_M64: #define __amd64 1
 // CHECK_GNR_M64: #define __amd64__ 1
 // CHECK_GNR_M64: #define __corei7 1
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index bfa478c4427a5b..6dd3f69f5bcbbb 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -103,6 +103,7 @@ enum ProcessorSubtypes {
   INTEL_COREI7_ARROWLAKE_S,
   INTEL_COREI7_PANTHERLAKE,
   AMDFAM1AH_ZNVER5,
+  INTEL_COREI7_DIAMONDRAPIDS,
   CPU_SUBTYPE_MAX
 };
 
@@ -600,6 +601,19 @@ static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
       break;
     }
     break;
+  case 19:
+    switch (Model) {
+    // Diamondrapids:
+    case 0x01:
+      CPU = "diamondrapids";
+      *Type = INTEL_COREI7;
+      *Subtype = INTEL_COREI7_DIAMONDRAPIDS;
+      break;
+
+    default: // Unknown family 0x13 CPU.
+      break;
+    }
+    break;
   default:
     break; // Unknown.
   }
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 073e19f8187c65..9ba667d0e9187e 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -107,6 +107,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE,      "arrowlake")
 X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S,    "arrowlake-s")
 X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE,    "pantherlake")
 X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5,            "znver5")
+X86_CPU_SUBTYPE(INTEL_COREI7_DIAMONDRAPIDS,  "diamondrapids")
 
 // Alternate names supported by __builtin_cpu_is and target multiversioning.
 X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "raptorlake")
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h
index 0e17c4674719cf..aafa77175b90e7 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@@ -121,6 +121,7 @@ enum CPUKind {
   CK_GraniterapidsD,
   CK_Emeraldrapids,
   CK_Clearwaterforest,
+  CK_DiamondRapids,
   CK_KNL,
   CK_KNM,
   CK_Lakemont,
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 6bedf9e1d13ac3..764f62f2ab5ccb 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1139,6 +1139,28 @@ def ProcessorFeatures {
   list<SubtargetFeature> GNRDFeatures =
     !listconcat(GNRFeatures, GNRDAdditionalFeatures);
 
+  // Diamondrapids
+  list<SubtargetFeature> DMRAdditionalFeatures = [FeatureAVX10_2_512,
+                                                  FeatureAMXCOMPLEX,
+                                                  FeatureSM4,
+                                                  FeatureCMPCCXADD,
+                                                  FeatureAVXIFMA,
+                                                  FeatureAVXNECONVERT,
+                                                  FeatureAVXVNNIINT8,
+                                                  FeatureAVXVNNIINT16,
+                                                  FeatureSHA512,
+                                                  FeatureSM3,
+                                                  FeatureEGPR,
+                                                  FeatureZU,
+                                                  FeatureCCMP,
+                                                  FeaturePush2Pop2,
+                                                  FeaturePPX,
+                                                  FeatureNDD,
+                                                  FeatureNF,
+                                                  FeatureCF];
+  list<SubtargetFeature> DMRFeatures =
+    !listconcat(GNRDFeatures, DMRAdditionalFeatures);
+
   // Atom
   list<SubtargetFeature> AtomFeatures = [FeatureX87,
                                          FeatureCX8,
@@ -1840,6 +1862,8 @@ foreach P = ["graniterapids-d", "graniterapids_d"] in {
 def : ProcModel<P, SapphireRapidsModel,
                 ProcessorFeatures.GNRDFeatures, ProcessorFeatures.GNRTuning>;
 }
+def : ProcModel<"diamondrapids", SapphireRapidsModel,
+                ProcessorFeatures.DMRFeatures, ProcessorFeatures.GNRTuning>;
 
 // AMD CPUs.
 
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 5c4e3a9dc52b0f..88ee2787c141f6 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1006,6 +1006,19 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family,
     CPU = "pentium4";
     break;
   }
+  case 19:
+    switch (Model) {
+    // Diamondrapids:
+    case 0x01:
+      CPU = "diamondrapids";
+      *Type = X86::INTEL_COREI7;
+      *Subtype = X86::INTEL_COREI7_DIAMONDRAPIDS;
+      break;
+
+    default: // Unknown family 0x13 CPU.
+      break;
+    }
+    break;
   default:
     break; // Unknown.
   }
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 586df5748aa822..f21ccf2b4b8f6e 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -138,6 +138,12 @@ constexpr FeatureBitset FeaturesSapphireRapids =
     FeatureWAITPKG;
 constexpr FeatureBitset FeaturesGraniteRapids =
     FeaturesSapphireRapids | FeatureAMX_FP16 | FeaturePREFETCHI;
+constexpr FeatureBitset FeaturesDiamondRapids =
+    FeaturesGraniteRapids | FeatureAMX_COMPLEX | FeatureAVX10_2_512 |
+    FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT | FeatureAVXVNNIINT8 |
+    FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeatureEGPR |
+    FeatureZU | FeatureCCMP | FeaturePush2Pop2 | FeaturePPX | FeatureNDD | FeatureNF |
+    FeatureCF;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.
@@ -381,6 +387,8 @@ constexpr ProcInfo Processors[] = {
   { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
   // Clearwaterforest microarchitecture based processors.
   { {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
+  // Diamondrapids microarchitecture based processors.
+  { {"diamondrapids"}, CK_DiamondRapids, FEATURE_AVX10_2_512, FeaturesDiamondRapids, 'z', false },
   // Knights Landing processor.
   { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false },
   { {"mic_avx512"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', true },
diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll
index 5e4d09e081fec9..40c38c2e828498 100644
--- a/llvm/test/CodeGen/X86/cpus-intel.ll
+++ b/llvm/test/CodeGen/X86/cpus-intel.ll
@@ -39,6 +39,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=i686-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=nocona 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=core2 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
@@ -104,6 +105,7 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=gracemont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=pantherlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=clearwaterforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=diamondrapids 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 
 define void @foo() {
   ret void

>From eef0752518ec31f0e7160da3e68c75020e87e7b1 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 28 Oct 2024 16:33:01 +0800
Subject: [PATCH 2/5] clang-format

---
 llvm/lib/TargetParser/X86TargetParser.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index f21ccf2b4b8f6e..bbe214fe577c0e 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -140,10 +140,10 @@ constexpr FeatureBitset FeaturesGraniteRapids =
     FeaturesSapphireRapids | FeatureAMX_FP16 | FeaturePREFETCHI;
 constexpr FeatureBitset FeaturesDiamondRapids =
     FeaturesGraniteRapids | FeatureAMX_COMPLEX | FeatureAVX10_2_512 |
-    FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT | FeatureAVXVNNIINT8 |
-    FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 | FeatureSM4 | FeatureEGPR |
-    FeatureZU | FeatureCCMP | FeaturePush2Pop2 | FeaturePPX | FeatureNDD | FeatureNF |
-    FeatureCF;
+    FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT |
+    FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
+    FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 |
+    FeaturePPX | FeatureNDD | FeatureNF | FeatureCF;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.

>From cb420a3a48e93af8fd5f5352e33a070b1ca313d0 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 28 Oct 2024 16:36:19 +0800
Subject: [PATCH 3/5] release note changes

---
 clang/docs/ReleaseNotes.rst | 2 ++
 llvm/docs/ReleaseNotes.md   | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 31ee4f7e516fed..387471f5c3d41f 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -647,6 +647,8 @@ X86 Support
 - Supported intrinsics for ``MOVRS AND AVX10.2``.
   * Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
 
+- Supported ``-march/tune=diamondrapids``
+
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index f7215279940d69..59c40df100c3d9 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -224,6 +224,8 @@ Changes to the X86 Backend
 
 * Supported ISA of `MSR_IMM`.
 
+* Supported ``-mcpu=diamondrapids``
+
 Changes to the OCaml bindings
 -----------------------------
 

>From 52e63a0f4b48ad106471343f2f48976bd9b84661 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Fri, 15 Nov 2024 13:14:26 +0800
Subject: [PATCH 4/5] address comments

---
 compiler-rt/lib/builtins/cpu_model/x86.c  | 4 ++--
 llvm/lib/Target/X86/X86.td                | 2 +-
 llvm/lib/TargetParser/Host.cpp            | 4 ++--
 llvm/lib/TargetParser/X86TargetParser.cpp | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 6dd3f69f5bcbbb..3315dceb7f5622 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -603,14 +603,14 @@ static const char *getIntelProcessorTypeAndSubtype(unsigned Family,
     break;
   case 19:
     switch (Model) {
-    // Diamondrapids:
+    // Diamond Rapids:
     case 0x01:
       CPU = "diamondrapids";
       *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_DIAMONDRAPIDS;
       break;
 
-    default: // Unknown family 0x13 CPU.
+    default: // Unknown family 19 CPU.
       break;
     }
     break;
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 764f62f2ab5ccb..ccb033ef220a02 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1139,7 +1139,7 @@ def ProcessorFeatures {
   list<SubtargetFeature> GNRDFeatures =
     !listconcat(GNRFeatures, GNRDAdditionalFeatures);
 
-  // Diamondrapids
+  // Diamond Rapids
   list<SubtargetFeature> DMRAdditionalFeatures = [FeatureAVX10_2_512,
                                                   FeatureAMXCOMPLEX,
                                                   FeatureSM4,
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 88ee2787c141f6..91fc2e6e896dd1 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1008,14 +1008,14 @@ static StringRef getIntelProcessorTypeAndSubtype(unsigned Family,
   }
   case 19:
     switch (Model) {
-    // Diamondrapids:
+    // Diamond Rapids:
     case 0x01:
       CPU = "diamondrapids";
       *Type = X86::INTEL_COREI7;
       *Subtype = X86::INTEL_COREI7_DIAMONDRAPIDS;
       break;
 
-    default: // Unknown family 0x13 CPU.
+    default: // Unknown family 19 CPU.
       break;
     }
     break;
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index bbe214fe577c0e..b5897cccb1b0aa 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -387,7 +387,7 @@ constexpr ProcInfo Processors[] = {
   { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false },
   // Clearwaterforest microarchitecture based processors.
   { {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
-  // Diamondrapids microarchitecture based processors.
+  // Diamond Rapids microarchitecture based processors.
   { {"diamondrapids"}, CK_DiamondRapids, FEATURE_AVX10_2_512, FeaturesDiamondRapids, 'z', false },
   // Knights Landing processor.
   { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false },

>From d584d06fc5ff85e827a041e107a4df0b546733c0 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Fri, 15 Nov 2024 13:35:21 +0800
Subject: [PATCH 5/5] Update with recently merged ISAs.

---
 clang/test/Preprocessor/predefined-arch-macros.c | 12 ++++++++++++
 llvm/lib/Target/X86/X86.td                       |  8 +++++++-
 llvm/lib/TargetParser/X86TargetParser.cpp        |  4 +++-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 41bec61079d827..20aa2d4e0a54cb 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -1871,12 +1871,17 @@
 // RUN:     --target=i386 \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M32,CHECK_GNRD_M32,CHECK_DMR_M32
 // CHECK_GNR_M32: #define __AES__ 1
+// CHECK_DMR_M32: #define __AMX_AVX512__ 1
 // CHECK_GNR_M32: #define __AMX_BF16__ 1
 // CHECK_GNR_M32-NOT: #define __AMX_COMPLEX__ 1
 // CHECK_GNRD_M32: #define __AMX_COMPLEX__ 1
 // CHECK_GNR_M32: #define __AMX_FP16__ 1
+// CHECK_DMR_M32: #define __AMX_FP8__ 1
 // CHECK_GNR_M32: #define __AMX_INT8__ 1
+// CHECK_DMR_M32: #define __AMX_MOVRS__ 1
+// CHECK_DMR_M32: #define __AMX_TF32__ 1
 // CHECK_GNR_M32: #define __AMX_TILE__ 1
+// CHECK_DMR_M32: #define __AMX_TRANSPOSE__ 1
 // CHECK_DMR_M32: #define __AVX10_2_512__ 1
 // CHECK_DMR_M32: #define __AVX10_2__ 1
 // CHECK_GNR_M32: #define __AVX2__ 1
@@ -1918,6 +1923,7 @@
 // CHECK_GNR_M32: #define __LZCNT__ 1
 // CHECK_GNR_M32: #define __MMX__ 1
 // CHECK_GNR_M32: #define __MOVBE__ 1
+// CHECK_DMR_M32: #define __MOVRS__ 1
 // CHECK_DMR_M32: #define __NDD__ 1
 // CHECK_DMR_M32: #define __NF__ 1
 // CHECK_GNR_M32: #define __PCLMUL__ 1
@@ -1973,12 +1979,17 @@
 // RUN:     --target=x86_64 \
 // RUN:   | FileCheck -match-full-lines %s -check-prefixes=CHECK_GNR_M64,CHECK_GNRD_M64,CHECK_DMR_M64
 // CHECK_GNR_M64: #define __AES__ 1
+// CHECK_DMR_M64: #define __AMX_AVX512__ 1
 // CHECK_GNR_M64: #define __AMX_BF16__ 1
 // CHECK_GNR_M64-NOT: #define __AMX_COMPLEX__ 1
 // CHECK_GNRD_M64: #define __AMX_COMPLEX__ 1
 // CHECK_GNR_M64: #define __AMX_FP16__ 1
+// CHECK_DMR_M64: #define __AMX_FP8__ 1
 // CHECK_GNR_M64: #define __AMX_INT8__ 1
+// CHECK_DMR_M64: #define __AMX_MOVRS__ 1
+// CHECK_DMR_M64: #define __AMX_TF32__ 1
 // CHECK_GNR_M64: #define __AMX_TILE__ 1
+// CHECK_DMR_M64: #define __AMX_TRANSPOSE__ 1
 // CHECK_DMR_M64: #define __AVX10_2_512__ 1
 // CHECK_DMR_M64: #define __AVX10_2__ 1
 // CHECK_GNR_M64: #define __AVX2__ 1
@@ -2020,6 +2031,7 @@
 // CHECK_GNR_M64: #define __LZCNT__ 1
 // CHECK_GNR_M64: #define __MMX__ 1
 // CHECK_GNR_M64: #define __MOVBE__ 1
+// CHECK_DMR_M64: #define __MOVRS__ 1
 // CHECK_DMR_M64: #define __NDD__ 1
 // CHECK_DMR_M64: #define __NF__ 1
 // CHECK_GNR_M64: #define __PCLMUL__ 1
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 23f03dc9ad0a0d..11a6e06ba47d82 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1173,7 +1173,13 @@ def ProcessorFeatures {
                                                   FeaturePPX,
                                                   FeatureNDD,
                                                   FeatureNF,
-                                                  FeatureCF];
+                                                  FeatureCF,
+                                                  FeatureMOVRS,
+                                                  FeatureAMXMOVRS,
+                                                  FeatureAMXAVX512,
+                                                  FeatureAMXFP8,
+                                                  FeatureAMXTF32,
+                                                  FeatureAMXTRANSPOSE];
   list<SubtargetFeature> DMRFeatures =
     !listconcat(GNRDFeatures, DMRAdditionalFeatures);
 
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 5e4dc33e5fb80b..c84f3d917e7cb1 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -143,7 +143,9 @@ constexpr FeatureBitset FeaturesDiamondRapids =
     FeatureCMPCCXADD | FeatureAVXIFMA | FeatureAVXNECONVERT |
     FeatureAVXVNNIINT8 | FeatureAVXVNNIINT16 | FeatureSHA512 | FeatureSM3 |
     FeatureSM4 | FeatureEGPR | FeatureZU | FeatureCCMP | FeaturePush2Pop2 |
-    FeaturePPX | FeatureNDD | FeatureNF | FeatureCF;
+    FeaturePPX | FeatureNDD | FeatureNF | FeatureCF | FeatureMOVRS |
+    FeatureAMX_MOVRS | FeatureAMX_AVX512 | FeatureAMX_FP8 | FeatureAMX_TF32 |
+    FeatureAMX_TRANSPOSE;
 
 // Intel Atom processors.
 // Bonnell has feature parity with Core2 and adds MOVBE.



More information about the cfe-commits mailing list