[clang] [compiler-rt] [llvm] [X86] AMD Zen 6 Initial enablement (PR #179150)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Feb 2 09:47:52 PST 2026
https://github.com/ganeshgit updated https://github.com/llvm/llvm-project/pull/179150
>From 019e139b587fe0980e4c1fa64b0c3e98806bef3f Mon Sep 17 00:00:00 2001
From: Ganesh Gopalasubramanian <Ganesh.Gopalasubramanian at amd.com>
Date: Mon, 2 Feb 2026 12:36:42 +0000
Subject: [PATCH] [X86] Add basic AMD Zen 6 (znver6) CPU support
This patch adds initial support for AMD Zen 6 architecture (znver6):
- Added znver6 CPU target recognition in Clang and LLVM
- Added znver6 to target parser and host CPU detection
- Updated scheduling models to inherit from znver4
- Added znver6 to various optimizer tests
- Updated compiler-rt CPU model detection for znver6
znver6 features: FP16, AVXVNNIINT8, AVXNECONVERT, AVXIFMA (without BMM).
Change-Id: I06958a56cacc7538a42045bd4ff2df81ae966ab1
---
clang/docs/ReleaseNotes.rst | 1 +
clang/lib/Basic/Targets/X86.cpp | 4 +
clang/test/CodeGen/target-builtin-noerror.c | 1 +
clang/test/Driver/x86-march.c | 4 +
clang/test/Frontend/x86-target-cpu.c | 1 +
clang/test/Misc/target-invalid-cpu-note/x86.c | 4 +
.../Preprocessor/predefined-arch-macros.c | 151 ++++++++++++++++++
compiler-rt/lib/builtins/cpu_model/x86.c | 19 ++-
.../llvm/TargetParser/X86TargetParser.def | 1 +
.../llvm/TargetParser/X86TargetParser.h | 1 +
llvm/lib/Target/X86/X86.td | 11 ++
llvm/lib/Target/X86/X86PfmCounters.td | 1 +
llvm/lib/Target/X86/X86ScheduleZnver4.td | 32 ++--
llvm/lib/TargetParser/Host.cpp | 6 +
llvm/lib/TargetParser/X86TargetParser.cpp | 5 +
.../CodeGen/X86/bypass-slow-division-64.ll | 1 +
llvm/test/CodeGen/X86/cmp16.ll | 1 +
llvm/test/CodeGen/X86/cpus-amd.ll | 1 +
llvm/test/CodeGen/X86/rdpru.ll | 1 +
llvm/test/CodeGen/X86/shuffle-as-shifts.ll | 1 +
llvm/test/CodeGen/X86/slow-unaligned-mem.ll | 1 +
llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll | 1 +
.../X86/tuning-shuffle-permilpd-avx512.ll | 1 +
.../X86/tuning-shuffle-permilps-avx512.ll | 1 +
.../X86/tuning-shuffle-unpckpd-avx512.ll | 1 +
.../X86/tuning-shuffle-unpckps-avx512.ll | 1 +
.../X86/vector-shuffle-fast-per-lane.ll | 1 +
llvm/test/CodeGen/X86/vpdpwssd.ll | 1 +
.../CodeGen/X86/x86-64-double-shifts-var.ll | 1 +
llvm/test/MC/X86/x86_long_nop.s | 2 +
.../Transforms/LoopUnroll/X86/call-remark.ll | 1 +
.../Transforms/SLPVectorizer/X86/pr63668.ll | 1 +
32 files changed, 234 insertions(+), 26 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 3a3d76112a02b..a510610741c2a 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -265,6 +265,7 @@ NVPTX Support
X86 Support
^^^^^^^^^^^
+- ``march=znver6`` is now supported.
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index f00d435937b92..881a06adf5ea5 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -716,6 +716,9 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
case CK_ZNVER5:
defineCPUMacros(Builder, "znver5");
break;
+ case CK_ZNVER6:
+ defineCPUMacros(Builder, "znver6");
+ break;
case CK_Geode:
defineCPUMacros(Builder, "geode");
break;
@@ -1641,6 +1644,7 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
case CK_ZNVER3:
case CK_ZNVER4:
case CK_ZNVER5:
+ case CK_ZNVER6:
// Deprecated
case CK_x86_64:
case CK_x86_64_v2:
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 47d5ae51d643a..a65a07d81b8c0 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -209,5 +209,6 @@ void verifycpustrings(void) {
(void)__builtin_cpu_is("znver3");
(void)__builtin_cpu_is("znver4");
(void)__builtin_cpu_is("znver5");
+ (void)__builtin_cpu_is("znver6");
(void)__builtin_cpu_is("diamondrapids");
}
diff --git a/clang/test/Driver/x86-march.c b/clang/test/Driver/x86-march.c
index 15f82547892c2..6a3ef5be67d8a 100644
--- a/clang/test/Driver/x86-march.c
+++ b/clang/test/Driver/x86-march.c
@@ -258,6 +258,10 @@
// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver5 2>&1 \
// RUN: | FileCheck %s -check-prefix=znver5
// znver5: "-target-cpu" "znver5"
+//
+// RUN: %clang -target x86_64-unknown-unknown -c -### %s -march=znver6 2>&1 \
+// RUN: | FileCheck %s -check-prefix=znver6
+// znver6: "-target-cpu" "znver6"
// RUN: %clang -target x86_64 -c -### %s -march=x86-64 2>&1 | FileCheck %s --check-prefix=x86-64
// x86-64: "-target-cpu" "x86-64"
diff --git a/clang/test/Frontend/x86-target-cpu.c b/clang/test/Frontend/x86-target-cpu.c
index f2885a040c370..7dc7f5474687e 100644
--- a/clang/test/Frontend/x86-target-cpu.c
+++ b/clang/test/Frontend/x86-target-cpu.c
@@ -39,5 +39,6 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver3 -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver4 -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver5 -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu znver6 -verify %s
//
// expected-no-diagnostics
diff --git a/clang/test/Misc/target-invalid-cpu-note/x86.c b/clang/test/Misc/target-invalid-cpu-note/x86.c
index 4a70e9bff3fef..766bd679796f5 100644
--- a/clang/test/Misc/target-invalid-cpu-note/x86.c
+++ b/clang/test/Misc/target-invalid-cpu-note/x86.c
@@ -103,6 +103,7 @@
// X86-SAME: {{^}}, znver3
// X86-SAME: {{^}}, znver4
// X86-SAME: {{^}}, znver5
+// X86-SAME: {{^}}, znver6
// X86-SAME: {{^}}, x86-64
// X86-SAME: {{^}}, x86-64-v2
// X86-SAME: {{^}}, x86-64-v3
@@ -183,6 +184,7 @@
// X86_64-SAME: {{^}}, znver3
// X86_64-SAME: {{^}}, znver4
// X86_64-SAME: {{^}}, znver5
+// X86_64-SAME: {{^}}, znver6
// X86_64-SAME: {{^}}, x86-64
// X86_64-SAME: {{^}}, x86-64-v2
// X86_64-SAME: {{^}}, x86-64-v3
@@ -290,6 +292,7 @@
// TUNE_X86-SAME: {{^}}, znver3
// TUNE_X86-SAME: {{^}}, znver4
// TUNE_X86-SAME: {{^}}, znver5
+// TUNE_X86-SAME: {{^}}, znver6
// TUNE_X86-SAME: {{^}}, x86-64
// TUNE_X86-SAME: {{^}}, geode
// TUNE_X86-SAME: {{$}}
@@ -395,6 +398,7 @@
// TUNE_X86_64-SAME: {{^}}, znver3
// TUNE_X86_64-SAME: {{^}}, znver4
// TUNE_X86_64-SAME: {{^}}, znver5
+// TUNE_X86_64-SAME: {{^}}, znver6
// TUNE_X86_64-SAME: {{^}}, x86-64
// TUNE_X86_64-SAME: {{^}}, geode
// TUNE_X86_64-SAME: {{$}}
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 1e38b4d3ba350..be94eb064cf91 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -4133,6 +4133,157 @@
// CHECK_ZNVER5_M64: #define __znver5 1
// CHECK_ZNVER5_M64: #define __znver5__ 1
+// RUN: %clang -march=znver6 -m32 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \
+// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER6_M32
+// CHECK_ZNVER6_M32-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER6_M32-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER6_M32: #define __ADX__ 1
+// CHECK_ZNVER6_M32: #define __AES__ 1
+// CHECK_ZNVER6_M32: #define __AVX2__ 1
+// CHECK_ZNVER6_M32: #define __AVX512BF16__ 1
+// CHECK_ZNVER6_M32: #define __AVX512BITALG__ 1
+// CHECK_ZNVER6_M32: #define __AVX512BW__ 1
+// CHECK_ZNVER6_M32: #define __AVX512CD__ 1
+// CHECK_ZNVER6_M32: #define __AVX512DQ__ 1
+// CHECK_ZNVER6_M32: #define __AVX512FP16__ 1
+// CHECK_ZNVER6_M32: #define __AVX512F__ 1
+// CHECK_ZNVER6_M32: #define __AVX512IFMA__ 1
+// CHECK_ZNVER6_M32: #define __AVX512VBMI2__ 1
+// CHECK_ZNVER6_M32: #define __AVX512VBMI__ 1
+// CHECK_ZNVER6_M32: #define __AVX512VL__ 1
+// CHECK_ZNVER6_M32: #define __AVX512VNNI__ 1
+// CHECK_ZNVER6_M32: #define __AVX512VP2INTERSECT__ 1
+// CHECK_ZNVER6_M32: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_ZNVER6_M32: #define __AVXIFMA__ 1
+// CHECK_ZNVER6_M32: #define __AVXNECONVERT__ 1
+// CHECK_ZNVER6_M32: #define __AVXVNNIINT8__ 1
+// CHECK_ZNVER6_M32: #define __AVXVNNI__ 1
+// CHECK_ZNVER6_M32: #define __AVX__ 1
+// CHECK_ZNVER6_M32: #define __BMI2__ 1
+// CHECK_ZNVER6_M32: #define __BMI__ 1
+// CHECK_ZNVER6_M32: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER6_M32: #define __CLWB__ 1
+// CHECK_ZNVER6_M32: #define __CLZERO__ 1
+// CHECK_ZNVER6_M32: #define __F16C__ 1
+// CHECK_ZNVER6_M32-NOT: #define __FMA4__ 1
+// CHECK_ZNVER6_M32: #define __FMA__ 1
+// CHECK_ZNVER6_M32: #define __FSGSBASE__ 1
+// CHECK_ZNVER6_M32: #define __GFNI__ 1
+// CHECK_ZNVER6_M32: #define __LZCNT__ 1
+// CHECK_ZNVER6_M32: #define __MMX__ 1
+// CHECK_ZNVER6_M32: #define __MOVDIR64B__ 1
+// CHECK_ZNVER6_M32: #define __MOVDIRI__ 1
+// CHECK_ZNVER6_M32: #define __PCLMUL__ 1
+// CHECK_ZNVER6_M32: #define __PKU__ 1
+// CHECK_ZNVER6_M32: #define __POPCNT__ 1
+// CHECK_ZNVER6_M32: #define __PREFETCHI__ 1
+// CHECK_ZNVER6_M32: #define __PRFCHW__ 1
+// CHECK_ZNVER6_M32: #define __RDPID__ 1
+// CHECK_ZNVER6_M32: #define __RDPRU__ 1
+// CHECK_ZNVER6_M32: #define __RDRND__ 1
+// CHECK_ZNVER6_M32: #define __RDSEED__ 1
+// CHECK_ZNVER6_M32: #define __SHA__ 1
+// CHECK_ZNVER6_M32: #define __SSE2_MATH__ 1
+// CHECK_ZNVER6_M32: #define __SSE2__ 1
+// CHECK_ZNVER6_M32: #define __SSE3__ 1
+// CHECK_ZNVER6_M32: #define __SSE4A__ 1
+// CHECK_ZNVER6_M32: #define __SSE4_1__ 1
+// CHECK_ZNVER6_M32: #define __SSE4_2__ 1
+// CHECK_ZNVER6_M32: #define __SSE_MATH__ 1
+// CHECK_ZNVER6_M32: #define __SSE__ 1
+// CHECK_ZNVER6_M32: #define __SSSE3__ 1
+// CHECK_ZNVER6_M32-NOT: #define __TBM__ 1
+// CHECK_ZNVER6_M32: #define __WBNOINVD__ 1
+// CHECK_ZNVER6_M32-NOT: #define __XOP__ 1
+// CHECK_ZNVER6_M32: #define __XSAVEC__ 1
+// CHECK_ZNVER6_M32: #define __XSAVEOPT__ 1
+// CHECK_ZNVER6_M32: #define __XSAVES__ 1
+// CHECK_ZNVER6_M32: #define __XSAVE__ 1
+// CHECK_ZNVER6_M32: #define __i386 1
+// CHECK_ZNVER6_M32: #define __i386__ 1
+// CHECK_ZNVER6_M32: #define __tune_znver6__ 1
+// CHECK_ZNVER6_M32: #define __znver6 1
+// CHECK_ZNVER6_M32: #define __znver6__ 1
+
+// RUN: %clang -march=znver6 -m64 -E -dM %s -o - 2>&1 \
+// RUN: -target i386-unknown-linux \
+// RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER6_M64
+// CHECK_ZNVER6_M64-NOT: #define __3dNOW_A__ 1
+// CHECK_ZNVER6_M64-NOT: #define __3dNOW__ 1
+// CHECK_ZNVER6_M64: #define __ADX__ 1
+// CHECK_ZNVER6_M64: #define __AES__ 1
+// CHECK_ZNVER6_M64: #define __AVX2__ 1
+// CHECK_ZNVER6_M64: #define __AVX512BF16__ 1
+// CHECK_ZNVER6_M64: #define __AVX512BITALG__ 1
+// CHECK_ZNVER6_M64: #define __AVX512BW__ 1
+// CHECK_ZNVER6_M64: #define __AVX512CD__ 1
+// CHECK_ZNVER6_M64: #define __AVX512DQ__ 1
+// CHECK_ZNVER6_M64: #define __AVX512FP16__ 1
+// CHECK_ZNVER6_M64: #define __AVX512F__ 1
+// CHECK_ZNVER6_M64: #define __AVX512IFMA__ 1
+// CHECK_ZNVER6_M64: #define __AVX512VBMI2__ 1
+// CHECK_ZNVER6_M64: #define __AVX512VBMI__ 1
+// CHECK_ZNVER6_M64: #define __AVX512VL__ 1
+// CHECK_ZNVER6_M64: #define __AVX512VNNI__ 1
+// CHECK_ZNVER6_M64: #define __AVX512VP2INTERSECT__ 1
+// CHECK_ZNVER6_M64: #define __AVX512VPOPCNTDQ__ 1
+// CHECK_ZNVER6_M64: #define __AVXIFMA__ 1
+// CHECK_ZNVER6_M64: #define __AVXNECONVERT__ 1
+// CHECK_ZNVER6_M64: #define __AVXVNNIINT8__ 1
+// CHECK_ZNVER6_M64: #define __AVXVNNI__ 1
+// CHECK_ZNVER6_M64: #define __AVX__ 1
+// CHECK_ZNVER6_M64: #define __BMI2__ 1
+// CHECK_ZNVER6_M64: #define __BMI__ 1
+// CHECK_ZNVER6_M64: #define __CLFLUSHOPT__ 1
+// CHECK_ZNVER6_M64: #define __CLWB__ 1
+// CHECK_ZNVER6_M64: #define __CLZERO__ 1
+// CHECK_ZNVER6_M64: #define __F16C__ 1
+// CHECK_ZNVER6_M64-NOT: #define __FMA4__ 1
+// CHECK_ZNVER6_M64: #define __FMA__ 1
+// CHECK_ZNVER6_M64: #define __FSGSBASE__ 1
+// CHECK_ZNVER6_M64: #define __GFNI__ 1
+// CHECK_ZNVER6_M64: #define __LZCNT__ 1
+// CHECK_ZNVER6_M64: #define __MMX__ 1
+// CHECK_ZNVER6_M64: #define __MOVDIR64B__ 1
+// CHECK_ZNVER6_M64: #define __MOVDIRI__ 1
+// CHECK_ZNVER6_M64: #define __PCLMUL__ 1
+// CHECK_ZNVER6_M64: #define __PKU__ 1
+// CHECK_ZNVER6_M64: #define __POPCNT__ 1
+// CHECK_ZNVER6_M64: #define __PREFETCHI__ 1
+// CHECK_ZNVER6_M64: #define __PRFCHW__ 1
+// CHECK_ZNVER6_M64: #define __RDPID__ 1
+// CHECK_ZNVER6_M64: #define __RDPRU__ 1
+// CHECK_ZNVER6_M64: #define __RDRND__ 1
+// CHECK_ZNVER6_M64: #define __RDSEED__ 1
+// CHECK_ZNVER6_M64: #define __SHA__ 1
+// CHECK_ZNVER6_M64: #define __SSE2_MATH__ 1
+// CHECK_ZNVER6_M64: #define __SSE2__ 1
+// CHECK_ZNVER6_M64: #define __SSE3__ 1
+// CHECK_ZNVER6_M64: #define __SSE4A__ 1
+// CHECK_ZNVER6_M64: #define __SSE4_1__ 1
+// CHECK_ZNVER6_M64: #define __SSE4_2__ 1
+// CHECK_ZNVER6_M64: #define __SSE_MATH__ 1
+// CHECK_ZNVER6_M64: #define __SSE__ 1
+// CHECK_ZNVER6_M64: #define __SSSE3__ 1
+// CHECK_ZNVER6_M64-NOT: #define __TBM__ 1
+// CHECK_ZNVER6_M64: #define __VAES__ 1
+// CHECK_ZNVER6_M64: #define __VPCLMULQDQ__ 1
+// CHECK_ZNVER6_M64: #define __WBNOINVD__ 1
+// CHECK_ZNVER6_M64-NOT: #define __XOP__ 1
+// CHECK_ZNVER6_M64: #define __XSAVEC__ 1
+// CHECK_ZNVER6_M64: #define __XSAVEOPT__ 1
+// CHECK_ZNVER6_M64: #define __XSAVES__ 1
+// CHECK_ZNVER6_M64: #define __XSAVE__ 1
+// CHECK_ZNVER6_M64: #define __amd64 1
+// CHECK_ZNVER6_M64: #define __amd64__ 1
+// CHECK_ZNVER6_M64: #define __tune_znver6__ 1
+// CHECK_ZNVER6_M64: #define __x86_64 1
+// CHECK_ZNVER6_M64: #define __x86_64__ 1
+// CHECK_ZNVER6_M64: #define __znver6 1
+// CHECK_ZNVER6_M64: #define __znver6__ 1
+
+
// End X86/GCC/Linux tests ------------------
// Begin PPC/GCC/Linux tests ----------------
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 55eb2b0958450..f4b95fa0b5a07 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -105,6 +105,7 @@ enum ProcessorSubtypes {
INTEL_COREI7_ARROWLAKE_S,
INTEL_COREI7_PANTHERLAKE,
AMDFAM1AH_ZNVER5,
+ AMDFAM1AH_ZNVER6,
INTEL_COREI7_DIAMONDRAPIDS,
INTEL_COREI7_NOVALAKE,
CPU_SUBTYPE_MAX
@@ -837,20 +838,18 @@ getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
case 26:
CPU = "znver5";
Type = AMDFAM1AH;
- if (Model <= 0x77) {
- // Models 00h-0Fh (Breithorn).
- // Models 10h-1Fh (Breithorn-Dense).
- // Models 20h-2Fh (Strix 1).
- // Models 30h-37h (Strix 2).
- // Models 38h-3Fh (Strix 3).
- // Models 40h-4Fh (Granite Ridge).
- // Models 50h-5Fh (Weisshorn).
- // Models 60h-6Fh (Krackan1).
- // Models 70h-77h (Sarlak).
+ if (Model <= 0x4f || (Model >= 0x60 && Model <= 0x77) ||
+ (Model >= 0xd0 && Model <= 0xd7)) {
CPU = "znver5";
Subtype = AMDFAM1AH_ZNVER5;
break; // "znver5"
}
+ if ((Model >= 0x50 && Model <= 0x5f) || (Model >= 0x80 && Model <= 0xcf) ||
+ (Model >= 0xd8 && Model <= 0xe7)) {
+ CPU = "znver6";
+ Subtype = AMDFAM1AH_ZNVER6;
+ break; // "znver6"
+ }
break;
default:
break; // Unknown AMD CPU.
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 09592bcea27f4..bc05452400458 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -107,6 +107,7 @@ X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE, "arrowlake")
X86_CPU_SUBTYPE(INTEL_COREI7_ARROWLAKE_S, "arrowlake-s")
X86_CPU_SUBTYPE(INTEL_COREI7_PANTHERLAKE, "pantherlake")
X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER5, "znver5")
+X86_CPU_SUBTYPE(AMDFAM1AH_ZNVER6, "znver6")
X86_CPU_SUBTYPE(INTEL_COREI7_DIAMONDRAPIDS, "diamondrapids")
X86_CPU_SUBTYPE(INTEL_COREI7_NOVALAKE, "novalake")
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h
index 46061f9d1fc7d..31d13ce29f7fc 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@@ -146,6 +146,7 @@ enum CPUKind {
CK_ZNVER3,
CK_ZNVER4,
CK_ZNVER5,
+ CK_ZNVER6,
CK_x86_64,
CK_x86_64_v2,
CK_x86_64_v3,
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 1b9a6ee2b4ef4..fa41d7a7a7c5e 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1631,6 +1631,15 @@ def ProcessorFeatures {
];
list<SubtargetFeature> ZN5Features =
!listconcat(ZN4Features, ZN5AdditionalFeatures);
+
+ list<SubtargetFeature> ZN6Tuning = ZN5Tuning;
+ list<SubtargetFeature> ZN6AdditionalFeatures = [FeatureFP16,
+ FeatureAVXVNNIINT8,
+ FeatureAVXNECONVERT,
+ FeatureAVXIFMA
+ ];
+ list<SubtargetFeature> ZN6Features =
+ !listconcat(ZN5Features, ZN6AdditionalFeatures);
}
//===----------------------------------------------------------------------===//
@@ -1993,6 +2002,8 @@ def : ProcModel<"znver4", Znver4Model, ProcessorFeatures.ZN4Features,
ProcessorFeatures.ZN4Tuning>;
def : ProcModel<"znver5", Znver4Model, ProcessorFeatures.ZN5Features,
ProcessorFeatures.ZN5Tuning>;
+def : ProcModel<"znver6", Znver4Model, ProcessorFeatures.ZN6Features,
+ ProcessorFeatures.ZN6Tuning>;
def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW],
[TuningSlowUAMem16, TuningInsertVZEROUPPER]>;
diff --git a/llvm/lib/Target/X86/X86PfmCounters.td b/llvm/lib/Target/X86/X86PfmCounters.td
index b299633446410..9d2a1ce01c273 100644
--- a/llvm/lib/Target/X86/X86PfmCounters.td
+++ b/llvm/lib/Target/X86/X86PfmCounters.td
@@ -387,3 +387,4 @@ def ZnVer4PfmCounters : ProcPfmCounters {
}
def : PfmCountersBinding<"znver4", ZnVer4PfmCounters>;
def : PfmCountersBinding<"znver5", ZnVer4PfmCounters>;
+def : PfmCountersBinding<"znver6", ZnVer4PfmCounters>;
diff --git a/llvm/lib/Target/X86/X86ScheduleZnver4.td b/llvm/lib/Target/X86/X86ScheduleZnver4.td
index ac4d31de8dbfe..bffcf35d6b65d 100644
--- a/llvm/lib/Target/X86/X86ScheduleZnver4.td
+++ b/llvm/lib/Target/X86/X86ScheduleZnver4.td
@@ -9,7 +9,7 @@
// This file defines the machine model for Znver4 to support instruction
// scheduling and other instruction cost heuristics.
// Based on:
-// * AMD Software Optimization Guide for the AMD Family 19h (Zen4)
+// * AMD Software Optimization Guide for the AMD Family 19h (Zen4)
// Microarchitecture
// https://www.amd.com/system/files/TechDocs/57647.zip
//===----------------------------------------------------------------------===//
@@ -1550,7 +1550,7 @@ def Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr : SchedWriteRes<[Zn4FPFMisc01]> {
let NumMicroOps = 1;
}
def : InstRW<[Zn4WriteVFIXUPIMMPDZrr_VRANGESDrr], (instregex
- "VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz",
+ "VFIXUPIMM(S|P)(S|D)(Z|Z128|Z256?)rrik", "VFIXUPIMM(S|P)(S|D)(Z?|Z128?|Z256?)rrikz",
"VFIXUPIMM(S|P)(S|D)(Z128|Z256?)rri", "VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)",
"VRANGE(S|P)(S|D)(Z|Z128|Z256?)rri(b?)k","VRANGE(S|P)(S|D)(Z?|Z128?|Z256?)rri(b?)kz"
)>;
@@ -1824,20 +1824,20 @@ def Zn4VecALUZSlow: SchedWriteRes<[Zn4FPFMisc01]> {
let ReleaseAtCycles = [2];
let NumMicroOps = 1;
}
-def : InstRW<[Zn4VecALUZSlow], (instrs
- VPABSBZ128rr, VPABSBZ128rrk, VPABSBZ128rrkz, VPABSDZ128rr,
- VPABSDZ128rrk, VPABSDZ128rrkz, VPABSQZ128rr, VPABSQZ128rrk,
- VPABSQZ128rrkz, VPABSWZ128rr, VPABSWZ128rrk, VPABSWZ128rrkz,
- VPADDSBZ128rr, VPADDSBZ128rrk, VPADDSBZ128rrkz, VPADDSWZ128rr,
- VPADDSWZ128rrk, VPADDSWZ128rrkz,VPADDUSBZ128rr, VPADDUSBZ128rrk,
- VPADDUSBZ128rrkz, VPADDUSWZ128rr, VPADDUSWZ128rrk, VPADDUSWZ128rrkz,
- VPAVGBZ128rr, VPAVGBZ128rrk, VPAVGBZ128rrkz, VPAVGWZ128rr,
- VPAVGWZ128rrk, VPAVGWZ128rrkz, VPOPCNTBZ128rr, VPOPCNTBZ128rrk,
- VPOPCNTBZ128rrkz, VPOPCNTDZ128rr, VPOPCNTDZ128rrk, VPOPCNTDZ128rrkz,
- VPOPCNTQZ128rr, VPOPCNTQZ128rrk,VPOPCNTQZ128rrkz, VPOPCNTWZ128rr,
- VPOPCNTWZ128rrk, VPOPCNTWZ128rrkz,VPSUBSBZ128rr, VPSUBSBZ128rrk,
- VPSUBSBZ128rrkz, VPSUBSWZ128rr, VPSUBSWZ128rrk, VPSUBSWZ128rrkz,
- VPSUBUSBZ128rr, VPSUBUSBZ128rrk, VPSUBUSBZ128rrkz,VPSUBUSWZ128rr,
+def : InstRW<[Zn4VecALUZSlow], (instrs
+ VPABSBZ128rr, VPABSBZ128rrk, VPABSBZ128rrkz, VPABSDZ128rr,
+ VPABSDZ128rrk, VPABSDZ128rrkz, VPABSQZ128rr, VPABSQZ128rrk,
+ VPABSQZ128rrkz, VPABSWZ128rr, VPABSWZ128rrk, VPABSWZ128rrkz,
+ VPADDSBZ128rr, VPADDSBZ128rrk, VPADDSBZ128rrkz, VPADDSWZ128rr,
+ VPADDSWZ128rrk, VPADDSWZ128rrkz,VPADDUSBZ128rr, VPADDUSBZ128rrk,
+ VPADDUSBZ128rrkz, VPADDUSWZ128rr, VPADDUSWZ128rrk, VPADDUSWZ128rrkz,
+ VPAVGBZ128rr, VPAVGBZ128rrk, VPAVGBZ128rrkz, VPAVGWZ128rr,
+ VPAVGWZ128rrk, VPAVGWZ128rrkz, VPOPCNTBZ128rr, VPOPCNTBZ128rrk,
+ VPOPCNTBZ128rrkz, VPOPCNTDZ128rr, VPOPCNTDZ128rrk, VPOPCNTDZ128rrkz,
+ VPOPCNTQZ128rr, VPOPCNTQZ128rrk,VPOPCNTQZ128rrkz, VPOPCNTWZ128rr,
+ VPOPCNTWZ128rrk, VPOPCNTWZ128rrkz,VPSUBSBZ128rr, VPSUBSBZ128rrk,
+ VPSUBSBZ128rrkz, VPSUBSWZ128rr, VPSUBSWZ128rrk, VPSUBSWZ128rrkz,
+ VPSUBUSBZ128rr, VPSUBUSBZ128rrk, VPSUBUSBZ128rrkz,VPSUBUSWZ128rr,
VPSUBUSWZ128rrk, VPSUBUSWZ128rrkz
)>;
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 5d4fa2c88153c..362c151ca5bad 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1340,6 +1340,12 @@ static const char *getAMDProcessorTypeAndSubtype(unsigned Family,
*Subtype = X86::AMDFAM1AH_ZNVER5;
break; // "znver5"
}
+ if ((Model >= 0x50 && Model <= 0x5f) || (Model >= 0x80 && Model <= 0xcf) ||
+ (Model >= 0xd8 && Model <= 0xe7)) {
+ CPU = "znver6";
+ *Subtype = X86::AMDFAM1AH_ZNVER6;
+ break; // "znver6"
+ }
break;
default:
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 2810849e4af9e..f848b1ac08607 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -255,6 +255,10 @@ static constexpr FeatureBitset FeaturesZNVER5 =
FeaturesZNVER4 | FeatureAVXVNNI | FeatureMOVDIRI | FeatureMOVDIR64B |
FeatureAVX512VP2INTERSECT | FeaturePREFETCHI | FeatureAVXVNNI;
+static constexpr FeatureBitset FeaturesZNVER6 =
+ FeaturesZNVER5 | FeatureAVXVNNIINT8 | FeatureAVX512FP16 | FeatureAVXIFMA |
+ FeatureAVXNECONVERT;
+
// D151696 tranplanted Mangling and OnlyForCPUDispatchSpecific from
// X86TargetParser.def to here. They are assigned by following ways:
// 1. Copy the mangling from the original CPU_SPEICIFC MACROs. If no, assign
@@ -440,6 +444,7 @@ constexpr ProcInfo Processors[] = {
{ {"znver3"}, CK_ZNVER3, FEATURE_AVX2, FeaturesZNVER3, '\0', false },
{ {"znver4"}, CK_ZNVER4, FEATURE_AVX512VBMI2, FeaturesZNVER4, '\0', false },
{ {"znver5"}, CK_ZNVER5, FEATURE_AVX512VP2INTERSECT, FeaturesZNVER5, '\0', false },
+ { {"znver6"}, CK_ZNVER6, FEATURE_AVX512FP16, FeaturesZNVER6, '\0', false },
// Generic 64-bit processor.
{ {"x86-64"}, CK_x86_64, FEATURE_SSE2 , FeaturesX86_64, '\0', false },
{ {"x86-64-v2"}, CK_x86_64_v2, FEATURE_SSE4_2 , FeaturesX86_64_V2, '\0', false },
diff --git a/llvm/test/CodeGen/X86/bypass-slow-division-64.ll b/llvm/test/CodeGen/X86/bypass-slow-division-64.ll
index b0ca0069a526b..821b7b8e4144f 100644
--- a/llvm/test/CodeGen/X86/bypass-slow-division-64.ll
+++ b/llvm/test/CodeGen/X86/bypass-slow-division-64.ll
@@ -24,6 +24,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver6 | FileCheck %s --check-prefixes=CHECK,SLOW-DIVQ
; Additional tests for 64-bit divide bypass
diff --git a/llvm/test/CodeGen/X86/cmp16.ll b/llvm/test/CodeGen/X86/cmp16.ll
index 8c14a78d9e113..ff6ee68074088 100644
--- a/llvm/test/CodeGen/X86/cmp16.ll
+++ b/llvm/test/CodeGen/X86/cmp16.ll
@@ -14,6 +14,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=X64,X64-FAST
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=X64,X64-FAST
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=X64,X64-FAST
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver6 | FileCheck %s --check-prefixes=X64,X64-FAST
define i1 @cmp16_reg_eq_reg(i16 %a0, i16 %a1) {
; X86-GENERIC-LABEL: cmp16_reg_eq_reg:
diff --git a/llvm/test/CodeGen/X86/cpus-amd.ll b/llvm/test/CodeGen/X86/cpus-amd.ll
index 33b2cf3731478..33cbc71b41ecd 100644
--- a/llvm/test/CodeGen/X86/cpus-amd.ll
+++ b/llvm/test/CodeGen/X86/cpus-amd.ll
@@ -30,6 +30,7 @@
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver5 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
+; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=znver6 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
define void @foo() {
ret void
diff --git a/llvm/test/CodeGen/X86/rdpru.ll b/llvm/test/CodeGen/X86/rdpru.ll
index be79a4499a338..067ae31142c39 100644
--- a/llvm/test/CodeGen/X86/rdpru.ll
+++ b/llvm/test/CodeGen/X86/rdpru.ll
@@ -7,6 +7,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 -fast-isel | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 -fast-isel | FileCheck %s --check-prefix=X64
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 -fast-isel | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver6 -fast-isel | FileCheck %s --check-prefix=X64
define void @rdpru_asm() {
; X86-LABEL: rdpru_asm:
diff --git a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll
index 4b8f78d36c3f5..021f8d6fb971d 100644
--- a/llvm/test/CodeGen/X86/shuffle-as-shifts.ll
+++ b/llvm/test/CodeGen/X86/shuffle-as-shifts.ll
@@ -4,6 +4,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver6 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <4 x i32> @shuf_rot_v4i32_1032(<4 x i32> %x) {
diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
index ceef3fb4bb188..a215b60055dd5 100644
--- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -51,6 +51,7 @@
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver3 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver4 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver5 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=znver6 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
; Other chips with slow unaligned memory accesses
diff --git a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
index 74b51ac21dc1f..9d2708674c3ff 100644
--- a/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
+++ b/llvm/test/CodeGen/X86/sqrt-fastmath-tune.ll
@@ -7,6 +7,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver6 | FileCheck %s --check-prefixes=FAST-SCALAR,FAST-VECTOR
; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=X86-64
define float @f32_no_daz(float %f) #0 {
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll
index 162ab71fc00d4..e2c8b6df6e744 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-permilpd-avx512.ll
@@ -5,6 +5,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver6 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <8 x double> @transform_VPERMILPSZrr(<8 x double> %a) nounwind {
; CHECK-LABEL: transform_VPERMILPSZrr:
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
index cd97946da248f..53bad74552f8a 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-permilps-avx512.ll
@@ -5,6 +5,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver6 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <16 x float> @transform_VPERMILPSZrr(<16 x float> %a) nounwind {
; CHECK-LABEL: transform_VPERMILPSZrr:
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
index 5ea991f85523e..39a072eeeea4c 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-unpckpd-avx512.ll
@@ -6,6 +6,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver6 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <16 x float> @transform_VUNPCKLPDZrr(<16 x float> %a, <16 x float> %b) nounwind {
diff --git a/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll b/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll
index 96155f0300d2d..f8b9dac4c7ba8 100644
--- a/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll
+++ b/llvm/test/CodeGen/X86/tuning-shuffle-unpckps-avx512.ll
@@ -6,6 +6,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver6 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
define <16 x float> @transform_VUNPCKLPSZrr(<16 x float> %a, <16 x float> %b) nounwind {
; CHECK-LABEL: transform_VUNPCKLPSZrr:
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll b/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
index 4021b1bf292bb..5bf936c6e5cec 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-fast-per-lane.ll
@@ -9,6 +9,7 @@
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver3 | FileCheck %s --check-prefixes=FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=FAST
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=znver6 | FileCheck %s --check-prefixes=FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=haswell | FileCheck %s --check-prefixes=FAST
; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=FAST
diff --git a/llvm/test/CodeGen/X86/vpdpwssd.ll b/llvm/test/CodeGen/X86/vpdpwssd.ll
index 2ac2b48af4ce7..ea97800505bc2 100644
--- a/llvm/test/CodeGen/X86/vpdpwssd.ll
+++ b/llvm/test/CodeGen/X86/vpdpwssd.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,AVX512VL-VNNI
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,AVX-VNNI
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver6 | FileCheck %s --check-prefixes=CHECK,AVX-VNNI
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512-VNNI
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vnni,+avx512vl,+fast-dpwssd | FileCheck %s --check-prefixes=CHECK,AVX512VL-VNNI
diff --git a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
index c5e879c0135f4..bb1a4e5fcb75b 100644
--- a/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
+++ b/llvm/test/CodeGen/X86/x86-64-double-shifts-var.ll
@@ -18,6 +18,7 @@
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver3 | FileCheck %s --check-prefixes=BMI2-FAST
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s --check-prefixes=BMI2-FAST
; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver5 | FileCheck %s --check-prefixes=BMI2-FAST
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver6 | FileCheck %s --check-prefixes=BMI2-FAST
; Verify that for the X86_64 processors that are known to have poor latency
; double precision shift instructions we do not generate 'shld' or 'shrd'
diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s
index b79403bb5f1ec..2c5fe3acde26c 100644
--- a/llvm/test/MC/X86/x86_long_nop.s
+++ b/llvm/test/MC/X86/x86_long_nop.s
@@ -21,6 +21,8 @@
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver4 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver5 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver5 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=x86_64-pc-linux-gnu -mcpu=znver6 %s | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu %s -mcpu=znver6 | llvm-objdump -d --no-show-raw-insn - | FileCheck %s --check-prefix=LNOP15
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=nehalem %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=westmere %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=sandybridge %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
diff --git a/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
index b0f4385b7913d..f9768141e5d9c 100644
--- a/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
+++ b/llvm/test/Transforms/LoopUnroll/X86/call-remark.ll
@@ -2,6 +2,7 @@
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver3 -pass-remarks=TTI -pass-remarks-analysis=TTI < %s -S 2>&1 | FileCheck --check-prefixes=ALL,TTI %s
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver4 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
; RUN: opt -passes=debugify,loop-unroll -mcpu=znver5 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
+; RUN: opt -passes=debugify,loop-unroll -mcpu=znver6 -pass-remarks=loop-unroll -pass-remarks-analysis=loop-unroll < %s -S 2>&1 | FileCheck --check-prefixes=ALL,UNROLL %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
index 037e073de9d59..a1ee268392d0e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/pr63668.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver4 -S < %s | FileCheck %s
; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver5 -S < %s | FileCheck %s
+; RUN: opt -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu -mcpu=znver6 -S < %s | FileCheck %s
define internal i32 @testfunc() {
; CHECK-LABEL: define internal i32 @testfunc
More information about the cfe-commits
mailing list