[polly] [clang] [llvm] [X86] Remove Intel Xeon Phi Supports. (PR #76383)

Freddy Ye via cfe-commits cfe-commits at lists.llvm.org
Mon Dec 25 21:48:57 PST 2023


https://github.com/FreddyLeaf created https://github.com/llvm/llvm-project/pull/76383

- [CFE][X86] Remove Xeon Phi CPU names supports.
- [X86][BE] Remove Xeaon Phi CPU names supports.
- [X86][BE] Workaround backend tests to work.
- [X86][BE] Workaround special tests to work.
- Remove AVX512ER intrinsic supports.
- Remove AVX512PF intrinsic supports.
- Remove PREFETCHWT1 intrinsic supports.


>From 1af65a7e7bbc3bd17d638afc1219e8266674b0c2 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 20 Dec 2023 14:17:46 +0800
Subject: [PATCH 1/7] [CFE][X86] Remove Xeon Phi CPU names supports.

---
 clang/lib/Basic/Targets/X86.cpp               |   7 -
 clang/lib/Driver/ToolChains/Arch/X86.cpp      |   2 +-
 .../CodeGen/X86/x86-inline-asm-v-constraint.c |   1 -
 clang/test/CodeGen/attr-cpuspecific-cpus.c    |   3 -
 clang/test/CodeGen/attr-cpuspecific.c         |  90 +++++-----
 clang/test/Driver/cl-x86-flags.c              |   4 +-
 clang/test/Frontend/x86-target-cpu.c          |   2 -
 clang/test/Misc/target-invalid-cpu-note.c     |   8 +-
 .../Preprocessor/predefined-arch-macros.c     | 158 ------------------
 clang/test/Preprocessor/x86_target_features.c |   2 -
 .../llvm/TargetParser/X86TargetParser.h       |   2 -
 llvm/lib/TargetParser/X86TargetParser.cpp     |  12 --
 12 files changed, 52 insertions(+), 239 deletions(-)

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index b97f88647fa49f..2483bc8b08f014 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -633,11 +633,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     // recent primary x86 CPUs, and we should keep it that way.
     defineCPUMacros(Builder, "corei7");
     break;
-  case CK_KNL:
-    defineCPUMacros(Builder, "knl");
-    break;
-  case CK_KNM:
-    break;
   case CK_Lakemont:
     defineCPUMacros(Builder, "i586", /*Tuning*/false);
     defineCPUMacros(Builder, "pentium", /*Tuning*/false);
@@ -1569,8 +1564,6 @@ std::optional<unsigned> X86TargetInfo::getCPUCacheLineSize() const {
     case CK_GraniterapidsD:
     case CK_Emeraldrapids:
     case CK_Clearwaterforest:
-    case CK_KNL:
-    case CK_KNM:
     // K7
     case CK_Athlon:
     case CK_AthlonXP:
diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index fef0522aaf45b8..eea339c842431f 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -42,7 +42,7 @@ std::string x86::getX86TargetCPU(const Driver &D, const ArgList &Args,
     llvm::StringMap<StringRef> ArchMap({
         {"AVX", "sandybridge"},
         {"AVX2", "haswell"},
-        {"AVX512F", "knl"},
+        {"AVX512F", "skylake-avx512"},
         {"AVX512", "skylake-avx512"},
     });
     if (Triple.getArch() == llvm::Triple::x86) {
diff --git a/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c b/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c
index b75a84d7a7bcbf..07b5650470dbd8 100644
--- a/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c
+++ b/clang/test/CodeGen/X86/x86-inline-asm-v-constraint.c
@@ -1,7 +1,6 @@
 // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu x86-64 -o - |FileCheck %s --check-prefix SSE
 // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake -D AVX -o - | FileCheck %s --check-prefixes AVX,SSE
 // RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu skylake-avx512 -D AVX512 -D AVX -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
-// RUN: %clang_cc1 %s -triple x86_64-unknown-linux-gnu -emit-llvm -target-cpu knl -D AVX -D AVX512 -o - | FileCheck %s --check-prefixes AVX512,AVX,SSE
 
 typedef float __m128 __attribute__ ((vector_size (16)));
 typedef float __m256 __attribute__ ((vector_size (32)));
diff --git a/clang/test/CodeGen/attr-cpuspecific-cpus.c b/clang/test/CodeGen/attr-cpuspecific-cpus.c
index dd154fd227b25b..f8afe0baadd28b 100644
--- a/clang/test/CodeGen/attr-cpuspecific-cpus.c
+++ b/clang/test/CodeGen/attr-cpuspecific-cpus.c
@@ -32,11 +32,9 @@ ATTR(cpu_specific(haswell)) void CPU(void){}
 ATTR(cpu_specific(core_4th_gen_avx_tsx)) void CPU(void){}
 ATTR(cpu_specific(broadwell)) void CPU(void){}
 ATTR(cpu_specific(core_5th_gen_avx_tsx)) void CPU(void){}
-ATTR(cpu_specific(knl)) void CPU(void){}
 ATTR(cpu_specific(skylake)) void CPU(void){}
 ATTR(cpu_specific(skylake_avx512)) void CPU(void){}
 ATTR(cpu_specific(cannonlake)) void CPU(void){}
-ATTR(cpu_specific(knm)) void CPU(void){}
 ATTR(cpu_specific(cascadelake)) void CPU(void){}
 ATTR(cpu_specific(cooperlake)) void CPU(void){}
 ATTR(cpu_specific(icelake_client)) void CPU(void){}
@@ -50,7 +48,6 @@ ATTR(cpu_specific(core_2nd_gen_avx)) void CPU1(void){}
 ATTR(cpu_specific(core_3rd_gen_avx)) void CPU2(void){}
 ATTR(cpu_specific(core_4th_gen_avx)) void CPU3(void){}
 ATTR(cpu_specific(core_5th_gen_avx)) void CPU4(void){}
-ATTR(cpu_specific(mic_avx512)) void CPU5(void){}
 ATTR(cpu_specific(pentiumpro)) void CPU6(void){}
 ATTR(cpu_specific(pentium3)) void CPU7(void){}
 ATTR(cpu_specific(pentium3m)) void CPU8(void){}
diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c
index 2c3e6931800cd8..478480aa24996c 100644
--- a/clang/test/CodeGen/attr-cpuspecific.c
+++ b/clang/test/CodeGen/attr-cpuspecific.c
@@ -70,14 +70,14 @@ void NotCalled(void){}
 // declaration.
 void TwoVersions(void);
 
-ATTR(cpu_dispatch(ivybridge, knl))
+ATTR(cpu_dispatch(ivybridge, skx))
 void TwoVersions(void);
 // LINUX: define weak_odr ptr @TwoVersions.resolver()
 // LINUX: call void @__cpu_indicator_init
 // LINUX: %[[FEAT_INIT:.+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
-// LINUX: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 59754495
-// LINUX: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 59754495
-// LINUX: ret ptr @TwoVersions.Z
+// LINUX: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 16762879
+// LINUX: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 16762879
+// LINUX: ret ptr @TwoVersions.a
 // LINUX: ret ptr @TwoVersions.S
 // LINUX: call void @llvm.trap
 // LINUX: unreachable
@@ -85,9 +85,9 @@ void TwoVersions(void);
 // WINDOWS: define weak_odr dso_local void @TwoVersions() comdat
 // WINDOWS: call void @__cpu_indicator_init()
 // WINDOWS: %[[FEAT_INIT:.+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
-// WINDOWS: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 59754495
-// WINDOWS: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 59754495
-// WINDOWS: call void @TwoVersions.Z()
+// WINDOWS: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 16762879
+// WINDOWS: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 16762879
+// WINDOWS: call void @TwoVersions.a()
 // WINDOWS-NEXT: ret void
 // WINDOWS: call void @TwoVersions.S()
 // WINDOWS-NEXT: ret void
@@ -98,45 +98,45 @@ ATTR(cpu_specific(ivybridge))
 void TwoVersions(void){}
 // CHECK: define {{.*}}void @TwoVersions.S() #[[S]]
 
-ATTR(cpu_specific(knl))
+ATTR(cpu_specific(skx))
 void TwoVersions(void){}
-// CHECK: define {{.*}}void @TwoVersions.Z() #[[K:[0-9]+]]
+// CHECK: define {{.*}}void @TwoVersions.a() #[[K:[0-9]+]]
 
-ATTR(cpu_specific(ivybridge, knl))
+ATTR(cpu_specific(ivybridge, skx))
 void TwoVersionsSameAttr(void){}
 // CHECK: define {{.*}}void @TwoVersionsSameAttr.S() #[[S]]
-// CHECK: define {{.*}}void @TwoVersionsSameAttr.Z() #[[K]]
+// CHECK: define {{.*}}void @TwoVersionsSameAttr.a() #[[K]]
 
-ATTR(cpu_specific(atom, ivybridge, knl))
+ATTR(cpu_specific(atom, ivybridge, skx))
 void ThreeVersionsSameAttr(void){}
 // CHECK: define {{.*}}void @ThreeVersionsSameAttr.O() #[[O:[0-9]+]]
 // CHECK: define {{.*}}void @ThreeVersionsSameAttr.S() #[[S]]
-// CHECK: define {{.*}}void @ThreeVersionsSameAttr.Z() #[[K]]
+// CHECK: define {{.*}}void @ThreeVersionsSameAttr.a() #[[K]]
 
-ATTR(cpu_specific(knl))
+ATTR(cpu_specific(skx))
 void CpuSpecificNoDispatch(void) {}
-// CHECK: define {{.*}}void @CpuSpecificNoDispatch.Z() #[[K:[0-9]+]]
+// CHECK: define {{.*}}void @CpuSpecificNoDispatch.a() #[[K:[0-9]+]]
 
-ATTR(cpu_dispatch(knl))
+ATTR(cpu_dispatch(skx))
 void OrderDispatchUsageSpecific(void);
 // LINUX: define weak_odr ptr @OrderDispatchUsageSpecific.resolver()
 // LINUX: call void @__cpu_indicator_init
-// LINUX: ret ptr @OrderDispatchUsageSpecific.Z
+// LINUX: ret ptr @OrderDispatchUsageSpecific.a
 // LINUX: call void @llvm.trap
 // LINUX: unreachable
 
 // WINDOWS: define weak_odr dso_local void @OrderDispatchUsageSpecific() comdat
 // WINDOWS: call void @__cpu_indicator_init()
-// WINDOWS: call void @OrderDispatchUsageSpecific.Z()
+// WINDOWS: call void @OrderDispatchUsageSpecific.a()
 // WINDOWS-NEXT: ret void
 // WINDOWS: call void @llvm.trap
 // WINDOWS: unreachable
 
-// CHECK: define {{.*}}void @OrderDispatchUsageSpecific.Z()
+// CHECK: define {{.*}}void @OrderDispatchUsageSpecific.a()
 
-ATTR(cpu_specific(knl))
+ATTR(cpu_specific(skx))
 void OrderSpecificUsageDispatch(void) {}
-// CHECK: define {{.*}}void @OrderSpecificUsageDispatch.Z() #[[K:[0-9]+]]
+// CHECK: define {{.*}}void @OrderSpecificUsageDispatch.a() #[[K:[0-9]+]]
 
 void usages(void) {
   SingleVersion();
@@ -165,17 +165,17 @@ void usages(void) {
 // LINUX: declare void @CpuSpecificNoDispatch.ifunc()
 
 // has an extra config to emit!
-ATTR(cpu_dispatch(ivybridge, knl, atom))
+ATTR(cpu_dispatch(ivybridge, skx, atom))
 void TwoVersionsSameAttr(void);
 // LINUX: define weak_odr ptr @TwoVersionsSameAttr.resolver()
-// LINUX: ret ptr @TwoVersionsSameAttr.Z
+// LINUX: ret ptr @TwoVersionsSameAttr.a
 // LINUX: ret ptr @TwoVersionsSameAttr.S
 // LINUX: ret ptr @TwoVersionsSameAttr.O
 // LINUX: call void @llvm.trap
 // LINUX: unreachable
 
 // WINDOWS: define weak_odr dso_local void @TwoVersionsSameAttr() comdat
-// WINDOWS: call void @TwoVersionsSameAttr.Z
+// WINDOWS: call void @TwoVersionsSameAttr.a
 // WINDOWS-NEXT: ret void
 // WINDOWS: call void @TwoVersionsSameAttr.S
 // WINDOWS-NEXT: ret void
@@ -184,11 +184,11 @@ void TwoVersionsSameAttr(void);
 // WINDOWS: call void @llvm.trap
 // WINDOWS: unreachable
 
-ATTR(cpu_dispatch(atom, ivybridge, knl))
+ATTR(cpu_dispatch(atom, ivybridge, skx))
 void ThreeVersionsSameAttr(void){}
 // LINUX: define weak_odr ptr @ThreeVersionsSameAttr.resolver()
 // LINUX: call void @__cpu_indicator_init
-// LINUX: ret ptr @ThreeVersionsSameAttr.Z
+// LINUX: ret ptr @ThreeVersionsSameAttr.a
 // LINUX: ret ptr @ThreeVersionsSameAttr.S
 // LINUX: ret ptr @ThreeVersionsSameAttr.O
 // LINUX: call void @llvm.trap
@@ -196,7 +196,7 @@ void ThreeVersionsSameAttr(void){}
 
 // WINDOWS: define weak_odr dso_local void @ThreeVersionsSameAttr() comdat
 // WINDOWS: call void @__cpu_indicator_init
-// WINDOWS: call void @ThreeVersionsSameAttr.Z
+// WINDOWS: call void @ThreeVersionsSameAttr.a
 // WINDOWS-NEXT: ret void
 // WINDOWS: call void @ThreeVersionsSameAttr.S
 // WINDOWS-NEXT: ret void
@@ -205,22 +205,22 @@ void ThreeVersionsSameAttr(void){}
 // WINDOWS: call void @llvm.trap
 // WINDOWS: unreachable
 
-ATTR(cpu_dispatch(knl))
+ATTR(cpu_dispatch(skx))
 void OrderSpecificUsageDispatch(void);
 // LINUX: define weak_odr ptr @OrderSpecificUsageDispatch.resolver()
-// LINUX: ret ptr @OrderSpecificUsageDispatch.Z
+// LINUX: ret ptr @OrderSpecificUsageDispatch.a
 
 // WINDOWS: define weak_odr dso_local void @OrderSpecificUsageDispatch() comdat
 // WINDOWS: call void @__cpu_indicator_init
-// WINDOWS: call void @OrderSpecificUsageDispatch.Z
+// WINDOWS: call void @OrderSpecificUsageDispatch.a
 // WINDOWS-NEXT: ret void
 
 // No Cpu Specific options.
-ATTR(cpu_dispatch(atom, ivybridge, knl))
+ATTR(cpu_dispatch(atom, ivybridge, skx))
 void NoSpecifics(void);
 // LINUX: define weak_odr ptr @NoSpecifics.resolver()
 // LINUX: call void @__cpu_indicator_init
-// LINUX: ret ptr @NoSpecifics.Z
+// LINUX: ret ptr @NoSpecifics.a
 // LINUX: ret ptr @NoSpecifics.S
 // LINUX: ret ptr @NoSpecifics.O
 // LINUX: call void @llvm.trap
@@ -228,7 +228,7 @@ void NoSpecifics(void);
 
 // WINDOWS: define weak_odr dso_local void @NoSpecifics() comdat
 // WINDOWS: call void @__cpu_indicator_init
-// WINDOWS: call void @NoSpecifics.Z
+// WINDOWS: call void @NoSpecifics.a
 // WINDOWS-NEXT: ret void
 // WINDOWS: call void @NoSpecifics.S
 // WINDOWS-NEXT: ret void
@@ -237,11 +237,11 @@ void NoSpecifics(void);
 // WINDOWS: call void @llvm.trap
 // WINDOWS: unreachable
 
-ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
+ATTR(cpu_dispatch(atom, generic, ivybridge, skx))
 void HasGeneric(void);
 // LINUX: define weak_odr ptr @HasGeneric.resolver()
 // LINUX: call void @__cpu_indicator_init
-// LINUX: ret ptr @HasGeneric.Z
+// LINUX: ret ptr @HasGeneric.a
 // LINUX: ret ptr @HasGeneric.S
 // LINUX: ret ptr @HasGeneric.O
 // LINUX: ret ptr @HasGeneric.A
@@ -249,7 +249,7 @@ void HasGeneric(void);
 
 // WINDOWS: define weak_odr dso_local void @HasGeneric() comdat
 // WINDOWS: call void @__cpu_indicator_init
-// WINDOWS: call void @HasGeneric.Z
+// WINDOWS: call void @HasGeneric.a
 // WINDOWS-NEXT: ret void
 // WINDOWS: call void @HasGeneric.S
 // WINDOWS-NEXT: ret void
@@ -259,11 +259,11 @@ void HasGeneric(void);
 // WINDOWS-NEXT: ret void
 // WINDOWS-NOT: call void @llvm.trap
 
-ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
+ATTR(cpu_dispatch(atom, generic, ivybridge, skx))
 void HasParams(int i, double d);
 // LINUX: define weak_odr ptr @HasParams.resolver()
 // LINUX: call void @__cpu_indicator_init
-// LINUX: ret ptr @HasParams.Z
+// LINUX: ret ptr @HasParams.a
 // LINUX: ret ptr @HasParams.S
 // LINUX: ret ptr @HasParams.O
 // LINUX: ret ptr @HasParams.A
@@ -271,7 +271,7 @@ void HasParams(int i, double d);
 
 // WINDOWS: define weak_odr dso_local void @HasParams(i32 %0, double %1) comdat
 // WINDOWS: call void @__cpu_indicator_init
-// WINDOWS: call void @HasParams.Z(i32 %0, double %1)
+// WINDOWS: call void @HasParams.a(i32 %0, double %1)
 // WINDOWS-NEXT: ret void
 // WINDOWS: call void @HasParams.S(i32 %0, double %1)
 // WINDOWS-NEXT: ret void
@@ -281,11 +281,11 @@ void HasParams(int i, double d);
 // WINDOWS-NEXT: ret void
 // WINDOWS-NOT: call void @llvm.trap
 
-ATTR(cpu_dispatch(atom, generic, ivybridge, knl))
+ATTR(cpu_dispatch(atom, generic, ivybridge, skx))
 int HasParamsAndReturn(int i, double d);
 // LINUX: define weak_odr ptr @HasParamsAndReturn.resolver()
 // LINUX: call void @__cpu_indicator_init
-// LINUX: ret ptr @HasParamsAndReturn.Z
+// LINUX: ret ptr @HasParamsAndReturn.a
 // LINUX: ret ptr @HasParamsAndReturn.S
 // LINUX: ret ptr @HasParamsAndReturn.O
 // LINUX: ret ptr @HasParamsAndReturn.A
@@ -293,7 +293,7 @@ int HasParamsAndReturn(int i, double d);
 
 // WINDOWS: define weak_odr dso_local i32 @HasParamsAndReturn(i32 %0, double %1) comdat
 // WINDOWS: call void @__cpu_indicator_init
-// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.Z(i32 %0, double %1)
+// WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.a(i32 %0, double %1)
 // WINDOWS-NEXT: ret i32 %[[RET]]
 // WINDOWS: %[[RET:.+]] = musttail call i32 @HasParamsAndReturn.S(i32 %0, double %1)
 // WINDOWS-NEXT: ret i32 %[[RET]]
@@ -349,12 +349,12 @@ int DispatchFirst(void) {return 1;}
 // WINDOWS: define dso_local i32 @DispatchFirst.B
 // WINDOWS: ret i32 1
 
-ATTR(cpu_specific(knl))
+ATTR(cpu_specific(skx))
 void OrderDispatchUsageSpecific(void) {}
 
 // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
 // CHECK-SAME: "tune-cpu"="ivybridge"
-// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
-// CHECK-SAME: "tune-cpu"="knl"
+// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl,+bmi,+bmi2,+clflushopt,+clwb,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+pku,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsavec,+xsaveopt,+xsaves"
+// CHECK-SAME: "tune-cpu"="skx"
 // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+movbe,+sahf,+sse,+sse2,+sse3,+ssse3,+x87"
 // CHECK-SAME: "tune-cpu"="atom"
diff --git a/clang/test/Driver/cl-x86-flags.c b/clang/test/Driver/cl-x86-flags.c
index 51b16f0ce35463..ae3201db6deecd 100644
--- a/clang/test/Driver/cl-x86-flags.c
+++ b/clang/test/Driver/cl-x86-flags.c
@@ -71,7 +71,7 @@
 
 // RUN: %clang_cl -m32 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_AVX512F -- %s
 #if defined(TEST_32_ARCH_AVX512F)
-#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__  || __AVX512BW__
+#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__  || !__AVX512BW__
 #error fail
 #endif
 #endif
@@ -111,7 +111,7 @@
 
 // RUN: %clang_cl -m64 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_64_ARCH_AVX512F -- %s
 #if defined(TEST_64_ARCH_AVX512F)
-#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__  || __AVX512BW__
+#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__  || !__AVX512BW__
 #error fail
 #endif
 #endif
diff --git a/clang/test/Frontend/x86-target-cpu.c b/clang/test/Frontend/x86-target-cpu.c
index 6c8502ac2c21ee..116647164dcc5c 100644
--- a/clang/test/Frontend/x86-target-cpu.c
+++ b/clang/test/Frontend/x86-target-cpu.c
@@ -15,8 +15,6 @@
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu cannonlake -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake-client -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake-server -verify %s
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knl -verify %s
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knm -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu bonnell -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu silvermont -verify %s
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu k8 -verify %s
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 48e9f05d9b03de..6aa8d82ebc67b7 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -13,19 +13,19 @@
 
 // RUN: not %clang_cc1 -triple i386--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86
 // X86: error: unknown target CPU 'not-a-cpu'
-// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
+// X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4, geode{{$}}
 
 // RUN: not %clang_cc1 -triple x86_64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix X86_64
 // X86_64: error: unknown target CPU 'not-a-cpu'
-// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
+// X86_64-NEXT: note: valid target CPU values are: nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, x86-64-v2, x86-64-v3, x86-64-v4{{$}}
 
 // RUN: not %clang_cc1 -triple i386--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86
 // TUNE_X86: error: unknown target CPU 'not-a-cpu'
-// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
+// TUNE_X86-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
 
 // RUN: not %clang_cc1 -triple x86_64--- -tune-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix TUNE_X86_64
 // TUNE_X86_64: error: unknown target CPU 'not-a-cpu'
-// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, knl, knm, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
+// TUNE_X86_64-NEXT: note: valid target CPU values are: i386, i486, winchip-c6, winchip2, c3, i586, pentium, pentium-mmx, pentiumpro, i686, pentium2, pentium3, pentium3m, pentium-m, c3-2, yonah, pentium4, pentium4m, prescott, nocona, core2, penryn, bonnell, atom, silvermont, slm, goldmont, goldmont-plus, tremont, nehalem, corei7, westmere, sandybridge, corei7-avx, ivybridge, core-avx-i, haswell, core-avx2, broadwell, skylake, skylake-avx512, skx, cascadelake, cooperlake, cannonlake, icelake-client, rocketlake, icelake-server, tigerlake, sapphirerapids, alderlake, raptorlake, meteorlake, arrowlake, arrowlake-s, lunarlake, gracemont, pantherlake, sierraforest, grandridge, graniterapids, graniterapids-d, emeraldrapids, clearwaterforest, lakemont, k6, k6-2, k6-3, athlon, athlon-tbird, athlon-xp, athlon-mp, athlon-4, k8, athlon64, athlon-fx, opteron, k8-sse3, athlon64-sse3, opteron-sse3, amdfam10, barcelona, btver1, btver2, bdver1, bdver2, bdver3, bdver4, znver1, znver2, znver3, znver4, x86-64, geode{{$}}
 
 // RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
 // NVPTX: error: unknown target CPU 'not-a-cpu'
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 1ae6faea776785..305d6363b9c00b 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -787,164 +787,6 @@
 // CHECK_SKL_M64: #define __x86_64 1
 // CHECK_SKL_M64: #define __x86_64__ 1
 
-// RUN: %clang -march=knl -m32 -E -dM %s -o - 2>&1 \
-// RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_KNL_M32
-// CHECK_KNL_M32: #define __AES__ 1
-// CHECK_KNL_M32: #define __AVX2__ 1
-// CHECK_KNL_M32: #define __AVX512CD__ 1
-// CHECK_KNL_M32: #define __AVX512ER__ 1
-// CHECK_KNL_M32: #define __AVX512F__ 1
-// CHECK_KNL_M32: #define __AVX512PF__ 1
-// CHECK_KNL_M32: #define __AVX__ 1
-// CHECK_KNL_M32: #define __BMI2__ 1
-// CHECK_KNL_M32: #define __BMI__ 1
-// CHECK_KNL_M32-NOT: #define __EVEX256__ 1
-// CHECK_KNL_M32: #define __EVEX512__ 1
-// CHECK_KNL_M32: #define __F16C__ 1
-// CHECK_KNL_M32: #define __FMA__ 1
-// CHECK_KNL_M32: #define __LZCNT__ 1
-// CHECK_KNL_M32: #define __MMX__ 1
-// CHECK_KNL_M32: #define __MOVBE__ 1
-// CHECK_KNL_M32: #define __PCLMUL__ 1
-// CHECK_KNL_M32: #define __POPCNT__ 1
-// CHECK_KNL_M32: #define __PREFETCHWT1__ 1
-// CHECK_KNL_M32: #define __PRFCHW__ 1
-// CHECK_KNL_M32: #define __RDRND__ 1
-// CHECK_KNL_M32: #define __SSE2__ 1
-// CHECK_KNL_M32: #define __SSE3__ 1
-// CHECK_KNL_M32: #define __SSE4_1__ 1
-// CHECK_KNL_M32: #define __SSE4_2__ 1
-// CHECK_KNL_M32: #define __SSE__ 1
-// CHECK_KNL_M32: #define __SSSE3__ 1
-// CHECK_KNL_M32: #define __XSAVEOPT__ 1
-// CHECK_KNL_M32: #define __XSAVE__ 1
-// CHECK_KNL_M32: #define __i386 1
-// CHECK_KNL_M32: #define __i386__ 1
-// CHECK_KNL_M32: #define __knl 1
-// CHECK_KNL_M32: #define __knl__ 1
-// CHECK_KNL_M32: #define __tune_knl__ 1
-// CHECK_KNL_M32: #define i386 1
-
-// RUN: %clang -march=knl -m64 -E -dM %s -o - 2>&1 \
-// RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_KNL_M64
-// CHECK_KNL_M64: #define __AES__ 1
-// CHECK_KNL_M64: #define __AVX2__ 1
-// CHECK_KNL_M64: #define __AVX512CD__ 1
-// CHECK_KNL_M64: #define __AVX512ER__ 1
-// CHECK_KNL_M64: #define __AVX512F__ 1
-// CHECK_KNL_M64: #define __AVX512PF__ 1
-// CHECK_KNL_M64: #define __AVX__ 1
-// CHECK_KNL_M64: #define __BMI2__ 1
-// CHECK_KNL_M64: #define __BMI__ 1
-// CHECK_KNL_M64-NOT: #define __EVEX256__ 1
-// CHECK_KNL_M64: #define __EVEX512__ 1
-// CHECK_KNL_M64: #define __F16C__ 1
-// CHECK_KNL_M64: #define __FMA__ 1
-// CHECK_KNL_M64: #define __LZCNT__ 1
-// CHECK_KNL_M64: #define __MMX__ 1
-// CHECK_KNL_M64: #define __MOVBE__ 1
-// CHECK_KNL_M64: #define __PCLMUL__ 1
-// CHECK_KNL_M64: #define __POPCNT__ 1
-// CHECK_KNL_M64: #define __PREFETCHWT1__ 1
-// CHECK_KNL_M64: #define __PRFCHW__ 1
-// CHECK_KNL_M64: #define __RDRND__ 1
-// CHECK_KNL_M64: #define __SSE2_MATH__ 1
-// CHECK_KNL_M64: #define __SSE2__ 1
-// CHECK_KNL_M64: #define __SSE3__ 1
-// CHECK_KNL_M64: #define __SSE4_1__ 1
-// CHECK_KNL_M64: #define __SSE4_2__ 1
-// CHECK_KNL_M64: #define __SSE_MATH__ 1
-// CHECK_KNL_M64: #define __SSE__ 1
-// CHECK_KNL_M64: #define __SSSE3__ 1
-// CHECK_KNL_M64: #define __XSAVEOPT__ 1
-// CHECK_KNL_M64: #define __XSAVE__ 1
-// CHECK_KNL_M64: #define __amd64 1
-// CHECK_KNL_M64: #define __amd64__ 1
-// CHECK_KNL_M64: #define __knl 1
-// CHECK_KNL_M64: #define __knl__ 1
-// CHECK_KNL_M64: #define __tune_knl__ 1
-// CHECK_KNL_M64: #define __x86_64 1
-// CHECK_KNL_M64: #define __x86_64__ 1
-
-// RUN: %clang -march=knm -m32 -E -dM %s -o - 2>&1 \
-// RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_KNM_M32
-// CHECK_KNM_M32: #define __AES__ 1
-// CHECK_KNM_M32: #define __AVX2__ 1
-// CHECK_KNM_M32: #define __AVX512CD__ 1
-// CHECK_KNM_M32: #define __AVX512ER__ 1
-// CHECK_KNM_M32: #define __AVX512F__ 1
-// CHECK_KNM_M32: #define __AVX512PF__ 1
-// CHECK_KNM_M32: #define __AVX512VPOPCNTDQ__ 1
-// CHECK_KNM_M32: #define __AVX__ 1
-// CHECK_KNM_M32: #define __BMI2__ 1
-// CHECK_KNM_M32: #define __BMI__ 1
-// CHECK_KNM_M32-NOT: #define __EVEX256__ 1
-// CHECK_KNM_M32: #define __EVEX512__ 1
-// CHECK_KNM_M32: #define __F16C__ 1
-// CHECK_KNM_M32: #define __FMA__ 1
-// CHECK_KNM_M32: #define __LZCNT__ 1
-// CHECK_KNM_M32: #define __MMX__ 1
-// CHECK_KNM_M32: #define __MOVBE__ 1
-// CHECK_KNM_M32: #define __PCLMUL__ 1
-// CHECK_KNM_M32: #define __POPCNT__ 1
-// CHECK_KNM_M32: #define __PREFETCHWT1__ 1
-// CHECK_KNM_M32: #define __PRFCHW__ 1
-// CHECK_KNM_M32: #define __RDRND__ 1
-// CHECK_KNM_M32: #define __SSE2__ 1
-// CHECK_KNM_M32: #define __SSE3__ 1
-// CHECK_KNM_M32: #define __SSE4_1__ 1
-// CHECK_KNM_M32: #define __SSE4_2__ 1
-// CHECK_KNM_M32: #define __SSE__ 1
-// CHECK_KNM_M32: #define __SSSE3__ 1
-// CHECK_KNM_M32: #define __XSAVEOPT__ 1
-// CHECK_KNM_M32: #define __XSAVE__ 1
-// CHECK_KNM_M32: #define __i386 1
-// CHECK_KNM_M32: #define __i386__ 1
-// CHECK_KNM_M32: #define i386 1
-
-// RUN: %clang -march=knm -m64 -E -dM %s -o - 2>&1 \
-// RUN:     -target i386-unknown-linux \
-// RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_KNM_M64
-// CHECK_KNM_M64: #define __AES__ 1
-// CHECK_KNM_M64: #define __AVX2__ 1
-// CHECK_KNM_M64: #define __AVX512CD__ 1
-// CHECK_KNM_M64: #define __AVX512ER__ 1
-// CHECK_KNM_M64: #define __AVX512F__ 1
-// CHECK_KNM_M64: #define __AVX512PF__ 1
-// CHECK_KNM_M64: #define __AVX512VPOPCNTDQ__ 1
-// CHECK_KNM_M64: #define __AVX__ 1
-// CHECK_KNM_M64: #define __BMI2__ 1
-// CHECK_KNM_M64: #define __BMI__ 1
-// CHECK_KNM_M64-NOT: #define __EVEX256__ 1
-// CHECK_KNM_M64: #define __EVEX512__ 1
-// CHECK_KNM_M64: #define __F16C__ 1
-// CHECK_KNM_M64: #define __FMA__ 1
-// CHECK_KNM_M64: #define __LZCNT__ 1
-// CHECK_KNM_M64: #define __MMX__ 1
-// CHECK_KNM_M64: #define __MOVBE__ 1
-// CHECK_KNM_M64: #define __PCLMUL__ 1
-// CHECK_KNM_M64: #define __POPCNT__ 1
-// CHECK_KNM_M64: #define __PREFETCHWT1__ 1
-// CHECK_KNM_M64: #define __PRFCHW__ 1
-// CHECK_KNM_M64: #define __RDRND__ 1
-// CHECK_KNM_M64: #define __SSE2_MATH__ 1
-// CHECK_KNM_M64: #define __SSE2__ 1
-// CHECK_KNM_M64: #define __SSE3__ 1
-// CHECK_KNM_M64: #define __SSE4_1__ 1
-// CHECK_KNM_M64: #define __SSE4_2__ 1
-// CHECK_KNM_M64: #define __SSE_MATH__ 1
-// CHECK_KNM_M64: #define __SSE__ 1
-// CHECK_KNM_M64: #define __SSSE3__ 1
-// CHECK_KNM_M64: #define __XSAVEOPT__ 1
-// CHECK_KNM_M64: #define __XSAVE__ 1
-// CHECK_KNM_M64: #define __amd64 1
-// CHECK_KNM_M64: #define __amd64__ 1
-// CHECK_KNM_M64: #define __x86_64 1
-// CHECK_KNM_M64: #define __x86_64__ 1
-
 // RUN: %clang -march=skylake-avx512 -m32 -E -dM %s -o - 2>&1 \
 // RUN:     -target i386-unknown-linux \
 // RUN:   | FileCheck -match-full-lines %s -check-prefix=CHECK_SKX_M32
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 888eecd08d84a2..8ef565ccbc85c3 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -287,7 +287,6 @@
 
 // RUN: %clang -target i386-unknown-unknown -march=nehalem -mno-sse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CPUPOPCNT %s
 // RUN: %clang -target i386-unknown-unknown -march=silvermont -mno-sse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CPUPOPCNT %s
-// RUN: %clang -target i386-unknown-unknown -march=knl -mno-sse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CPUPOPCNT %s
 
 // CPUPOPCNT: #define __POPCNT__ 1
 
@@ -305,7 +304,6 @@
 
 // RUN: %clang -target i386-unknown-unknown -march=pentium3 -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s
 // RUN: %clang -target i386-unknown-unknown -march=atom -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s
-// RUN: %clang -target i386-unknown-unknown -march=knl -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s
 // RUN: %clang -target i386-unknown-unknown -march=btver1 -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s
 // RUN: %clang -target i386-unknown-unknown -march=znver1 -mno-sse -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=MARCHMMXNOSSE %s
 
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.h b/llvm/include/llvm/TargetParser/X86TargetParser.h
index 2083e585af4ac8..f3b5b767f56e42 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.h
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.h
@@ -121,8 +121,6 @@ enum CPUKind {
   CK_GraniterapidsD,
   CK_Emeraldrapids,
   CK_Clearwaterforest,
-  CK_KNL,
-  CK_KNM,
   CK_Lakemont,
   CK_K6,
   CK_K6_2,
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 085554f18b2b6b..df900aed145ff9 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -93,13 +93,6 @@ constexpr FeatureBitset FeaturesHaswell =
 constexpr FeatureBitset FeaturesBroadwell =
     FeaturesHaswell | FeatureADX | FeaturePRFCHW | FeatureRDSEED;
 
-// Intel Knights Landing and Knights Mill
-// Knights Landing has feature parity with Broadwell.
-constexpr FeatureBitset FeaturesKNL =
-    FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 |
-    FeatureAVX512CD | FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
-constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ;
-
 // Intel Skylake processors.
 constexpr FeatureBitset FeaturesSkylakeClient =
     FeaturesBroadwell | FeatureAES | FeatureCLFLUSHOPT | FeatureXSAVEC |
@@ -379,11 +372,6 @@ constexpr ProcInfo Processors[] = {
   { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false },
   // Clearwaterforest microarchitecture based processors.
   { {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false },
-  // Knights Landing processor.
-  { {"knl"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', false },
-  { {"mic_avx512"}, CK_KNL, FEATURE_AVX512F, FeaturesKNL, 'Z', true },
-  // Knights Mill processor.
-  { {"knm"}, CK_KNM, FEATURE_AVX5124FMAPS, FeaturesKNM, 'j', false },
   // Lakemont microarchitecture based processors.
   { {"lakemont"}, CK_Lakemont, ~0U, FeatureCMPXCHG8B, '\0', false },
   // K6 architecture processors.

>From 62f5c0ce3f8946f432ecf8e3535574c0d99615d5 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 21 Dec 2023 20:42:21 +0800
Subject: [PATCH 2/7] [X86][BE] Remove Xeaon Phi CPU names supports.

---
 llvm/lib/Target/X86/X86.td | 53 --------------------------------------
 1 file changed, 53 deletions(-)

diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 5fd6828f43129d..d13dc9a271e59b 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1281,51 +1281,6 @@ def ProcessorFeatures {
   list<SubtargetFeature> CWFFeatures =
     !listconcat(ARLSFeatures, CWFAdditionalFeatures);
 
-  // Knights Landing
-  list<SubtargetFeature> KNLFeatures = [FeatureX87,
-                                        FeatureCX8,
-                                        FeatureCMOV,
-                                        FeatureMMX,
-                                        FeatureFXSR,
-                                        FeatureNOPL,
-                                        FeatureX86_64,
-                                        FeatureCX16,
-                                        FeatureCRC32,
-                                        FeaturePOPCNT,
-                                        FeaturePCLMUL,
-                                        FeatureXSAVE,
-                                        FeatureXSAVEOPT,
-                                        FeatureLAHFSAHF64,
-                                        FeatureAES,
-                                        FeatureRDRAND,
-                                        FeatureF16C,
-                                        FeatureFSGSBase,
-                                        FeatureAVX512,
-                                        FeatureEVEX512,
-                                        FeatureERI,
-                                        FeatureCDI,
-                                        FeaturePFI,
-                                        FeaturePREFETCHWT1,
-                                        FeatureADX,
-                                        FeatureRDSEED,
-                                        FeatureMOVBE,
-                                        FeatureLZCNT,
-                                        FeatureBMI,
-                                        FeatureBMI2,
-                                        FeatureFMA,
-                                        FeaturePRFCHW];
-  list<SubtargetFeature> KNLTuning = [TuningSlowDivide64,
-                                      TuningSlow3OpsLEA,
-                                      TuningSlowIncDec,
-                                      TuningSlowTwoMemOps,
-                                      TuningPreferMaskRegisters,
-                                      TuningFastGather,
-                                      TuningFastMOVBE,
-                                      TuningSlowPMADDWD];
-  // TODO Add AVX5124FMAPS/AVX5124VNNIW features
-  list<SubtargetFeature> KNMFeatures =
-    !listconcat(KNLFeatures, [FeatureVPOPCNTDQ]);
-
   // Barcelona
   list<SubtargetFeature> BarcelonaFeatures = [FeatureX87,
                                               FeatureCX8,
@@ -1746,14 +1701,6 @@ foreach P = ["broadwell", "core_5th_gen_avx", "core_5th_gen_avx_tsx"] in {
 def : ProcModel<"skylake", SkylakeClientModel, ProcessorFeatures.SKLFeatures,
                 ProcessorFeatures.SKLTuning>;
 
-// FIXME: define KNL scheduler model
-foreach P = ["knl", "mic_avx512"] in {
-  def : ProcModel<P, HaswellModel, ProcessorFeatures.KNLFeatures,
-                  ProcessorFeatures.KNLTuning>;
-}
-def : ProcModel<"knm", HaswellModel, ProcessorFeatures.KNMFeatures,
-                ProcessorFeatures.KNLTuning>;
-
 foreach P = ["skylake-avx512", "skx", "skylake_avx512"] in {
   def : ProcModel<P, SkylakeServerModel, ProcessorFeatures.SKXFeatures,
                   ProcessorFeatures.SKXTuning>;

>From c5031694c2c6995daf60286436d0ef8e10e24157 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 21 Dec 2023 16:34:53 +0800
Subject: [PATCH 3/7] [X86][BE] Workaround backend tests to work.

replace '-mcpu=knl' with '-mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper'
---
 .../X86/masked-intrinsic-cost-inseltpoison.ll          |  2 +-
 .../Analysis/CostModel/X86/masked-intrinsic-cost.ll    |  2 +-
 llvm/test/Analysis/CostModel/X86/mul32.ll              |  2 +-
 llvm/test/Analysis/CostModel/X86/mul64.ll              |  2 +-
 llvm/test/CodeGen/X86/all-ones-vector.ll               |  4 ++--
 llvm/test/CodeGen/X86/atomic-idempotent.ll             |  2 +-
 llvm/test/CodeGen/X86/avx-isa-check.ll                 |  8 ++++----
 llvm/test/CodeGen/X86/avx512-bugfix-23634.ll           |  2 +-
 llvm/test/CodeGen/X86/avx512-bugfix-25270.ll           |  2 +-
 llvm/test/CodeGen/X86/avx512-build-vector.ll           |  2 +-
 llvm/test/CodeGen/X86/avx512-calling-conv.ll           | 10 +++++-----
 llvm/test/CodeGen/X86/avx512-hadd-hsub.ll              |  2 +-
 llvm/test/CodeGen/X86/avx512-inc-dec.ll                |  2 +-
 llvm/test/CodeGen/X86/avx512-intel-ocl.ll              |  8 ++++----
 llvm/test/CodeGen/X86/avx512-logic.ll                  |  2 +-
 llvm/test/CodeGen/X86/avx512-memfold.ll                |  2 +-
 llvm/test/CodeGen/X86/avx512-mov.ll                    |  2 +-
 llvm/test/CodeGen/X86/avx512-rotate.ll                 |  2 +-
 llvm/test/CodeGen/X86/avx512-scalar.ll                 |  2 +-
 llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll       |  2 +-
 llvm/test/CodeGen/X86/avx512-scalar_mask.ll            |  2 +-
 llvm/test/CodeGen/X86/avx512-shift.ll                  |  2 +-
 llvm/test/CodeGen/X86/avx512-vselect-crash.ll          |  2 +-
 llvm/test/CodeGen/X86/avx512-vselect.ll                |  2 +-
 llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll |  4 ++--
 llvm/test/CodeGen/X86/avx512bw-mov.ll                  |  2 +-
 .../CodeGen/X86/avx512bwvl-intrinsics-canonical.ll     |  2 +-
 llvm/test/CodeGen/X86/avx512bwvl-mov.ll                |  2 +-
 llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll         |  2 +-
 llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll        |  2 +-
 llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll     |  2 +-
 llvm/test/CodeGen/X86/avx512vl-arith.ll                |  4 ++--
 llvm/test/CodeGen/X86/avx512vl-logic.ll                |  2 +-
 llvm/test/CodeGen/X86/avx512vl-mov.ll                  |  2 +-
 llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll              |  2 +-
 llvm/test/CodeGen/X86/combine-select.ll                |  2 +-
 llvm/test/CodeGen/X86/cpus-intel.ll                    |  3 ---
 llvm/test/CodeGen/X86/i1narrowfail.ll                  |  2 +-
 llvm/test/CodeGen/X86/memcpy.ll                        |  2 +-
 llvm/test/CodeGen/X86/memset-zero.ll                   |  2 +-
 llvm/test/CodeGen/X86/pku.ll                           |  4 ++--
 llvm/test/CodeGen/X86/pr32451.ll                       |  2 +-
 llvm/test/CodeGen/X86/pr34139.ll                       |  2 +-
 llvm/test/CodeGen/X86/recip-fastmath.ll                |  2 +-
 llvm/test/CodeGen/X86/recip-fastmath2.ll               |  2 +-
 llvm/test/CodeGen/X86/setcc-lowering.ll                |  2 +-
 llvm/test/CodeGen/X86/slow-pmulld.ll                   |  4 ++--
 llvm/test/CodeGen/X86/slow-unaligned-mem.ll            |  2 +-
 llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll        |  2 +-
 llvm/test/CodeGen/X86/vselect-minmax.ll                |  2 +-
 llvm/test/CodeGen/X86/x86-interrupt_cc.ll              |  4 ++--
 llvm/test/CodeGen/X86/xaluo.ll                         |  2 +-
 llvm/test/CodeGen/X86/xmulo.ll                         |  2 +-
 llvm/test/MC/Disassembler/X86/avx-512.txt              |  2 +-
 llvm/test/MC/X86/x86_long_nop.s                        |  2 +-
 .../Transforms/LoopVectorize/X86/gather_scatter.ll     |  4 ++--
 .../Transforms/LoopVectorize/X86/masked_load_store.ll  |  2 +-
 .../SLPVectorizer/X86/alternate-calls-inseltpoison.ll  |  2 +-
 .../Transforms/SLPVectorizer/X86/alternate-calls.ll    |  2 +-
 .../SLPVectorizer/X86/alternate-cast-inseltpoison.ll   |  2 +-
 .../Transforms/SLPVectorizer/X86/alternate-cast.ll     |  2 +-
 .../SLPVectorizer/X86/alternate-fp-inseltpoison.ll     |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll |  2 +-
 .../SLPVectorizer/X86/alternate-int-inseltpoison.ll    |  2 +-
 .../test/Transforms/SLPVectorizer/X86/alternate-int.ll |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll    |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-add-saddo.ll    |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-add-ssat.ll     |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-add-uaddo.ll    |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-add-usat.ll     |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll    |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll    |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll    |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-fshl-rot.ll     |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll   |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-fshr-rot.ll     |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll   |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-mul-smulo.ll    |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-mul-umulo.ll    |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll    |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll   |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll   |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-sub-ssat.ll     |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll    |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-sub-usat.ll     |  2 +-
 .../Transforms/SLPVectorizer/X86/arith-sub-usubo.ll    |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll    |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll   |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll   |  2 +-
 .../Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll  |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/hadd.ll         |  2 +-
 .../Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll  |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/hsub.ll         |  2 +-
 .../Transforms/SLPVectorizer/X86/sext-inseltpoison.ll  |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/sext.ll         |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll   |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll   |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll    |  2 +-
 .../Transforms/SLPVectorizer/X86/zext-inseltpoison.ll  |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/zext.ll         |  2 +-
 llvm/test/tools/llvm-mca/X86/cpus.s                    |  2 +-
 .../test/tools/llvm-mca/X86/register-file-statistics.s |  2 +-
 llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s   |  2 +-
 103 files changed, 119 insertions(+), 122 deletions(-)

diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
index 897344d622d0f8..81b37cfa977c2d 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX2
 ;
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,SKL
-; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL
+; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX
 
 define i32 @masked_load() {
diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
index 5f22b2e39f9470..49a83d6fe1e26e 100644
--- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
+++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX2
 ;
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,SKL
-; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL
+; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL
 ; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX
 
 define i32 @masked_load() {
diff --git a/llvm/test/Analysis/CostModel/X86/mul32.ll b/llvm/test/Analysis/CostModel/X86/mul32.ll
index d50fc41e7b049a..4db2167a6dad51 100644
--- a/llvm/test/Analysis/CostModel/X86/mul32.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul32.ll
@@ -11,7 +11,7 @@
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX512
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.8.0"
diff --git a/llvm/test/Analysis/CostModel/X86/mul64.ll b/llvm/test/Analysis/CostModel/X86/mul64.ll
index 718972093e8e51..e598974d993bb6 100644
--- a/llvm/test/Analysis/CostModel/X86/mul64.ll
+++ b/llvm/test/Analysis/CostModel/X86/mul64.ll
@@ -11,7 +11,7 @@
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=slm | FileCheck %s --check-prefixes=SLM
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=goldmont | FileCheck %s --check-prefixes=GLM
 ; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=btver2 | FileCheck %s --check-prefixes=AVX1
-; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=knl | FileCheck %s --check-prefixes=AVX512,AVX512F
+; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -mtriple=x86_64-apple-macosx10.8.0 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX512,AVX512F
 
 ;
 ; mul vXi8 -> mXi64
diff --git a/llvm/test/CodeGen/X86/all-ones-vector.ll b/llvm/test/CodeGen/X86/all-ones-vector.ll
index d624f6c13e3670..ee9df562d1f2f9 100644
--- a/llvm/test/CodeGen/X86/all-ones-vector.ll
+++ b/llvm/test/CodeGen/X86/all-ones-vector.ll
@@ -2,12 +2,12 @@
 ; RUN: llc < %s -mtriple=i386-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
 ; RUN: llc < %s -mtriple=i386-unknown -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=i386-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX256,AVX2
-; RUN: llc < %s -mtriple=i386-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX,AVX256,AVX512
+; RUN: llc < %s -mtriple=i386-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX256,AVX512
 ; RUN: llc < %s -mtriple=i386-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX256,AVX512
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx  | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX256,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX,AVX256,AVX512
+; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX256,AVX512
 ; RUN: llc < %s -mtriple=x86_64-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX256,AVX512
 
 define <16 x i8> @allones_v16i8() nounwind {
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index d5c46485068a64..ff41b451ac3477 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs           -mattr=+sse2      | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SSE2
 ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2      | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
 ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
-; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2      | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=-sse2      | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
 ; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2     | FileCheck %s --check-prefixes=X86,X86-ATOM
 
 ; On x86, an atomic rmw operation that does not modify the value in memory
diff --git a/llvm/test/CodeGen/X86/avx-isa-check.ll b/llvm/test/CodeGen/X86/avx-isa-check.ll
index c82a37781ab052..e6b17d64f5180c 100644
--- a/llvm/test/CodeGen/X86/avx-isa-check.ll
+++ b/llvm/test/CodeGen/X86/avx-isa-check.ll
@@ -2,10 +2,10 @@
 
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=corei7-avx                             -o /dev/null
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2                 -o /dev/null
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl                                    -o /dev/null
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl                  -o /dev/null
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512bw                  -o /dev/null
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl  -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper                                    -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper  -mattr=+avx512vl                  -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper  -mattr=+avx512bw                  -o /dev/null
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper  -mattr=+avx512vl -mattr=+avx512bw -o /dev/null
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx                                    -o /dev/null
 
 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp {
diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-23634.ll b/llvm/test/CodeGen/X86/avx512-bugfix-23634.ll
index ee20747e23dcc0..5eb0f80f42264e 100644
--- a/llvm/test/CodeGen/X86/avx512-bugfix-23634.ll
+++ b/llvm/test/CodeGen/X86/avx512-bugfix-23634.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll
index 60390958b3e470..d672320d89b6ca 100644
--- a/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll
+++ b/llvm/test/CodeGen/X86/avx512-bugfix-25270.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 declare void @Print__512(<16 x i32>) #0
 
diff --git a/llvm/test/CodeGen/X86/avx512-build-vector.ll b/llvm/test/CodeGen/X86/avx512-build-vector.ll
index b001ebf4d19b79..60a035ccbbfa58 100644
--- a/llvm/test/CodeGen/X86/avx512-build-vector.ll
+++ b/llvm/test/CodeGen/X86/avx512-build-vector.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 define <16 x i32> @test2(<16 x i32> %x) {
 ; CHECK-LABEL: test2:
diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
index b39b089faa2a5e..6efd692762db8c 100644
--- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll
+++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
-; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
+; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=KNL_X32
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx -fast-isel | FileCheck %s --check-prefix=FASTISEL
 
 define <16 x i1> @test1() {
@@ -681,12 +681,12 @@ define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
 ; KNL-NEXT:    pushq %rbx
 ; KNL-NEXT:    xorl %r10d, %r10d
 ; KNL-NEXT:    testb $1, {{[0-9]+}}(%rsp)
-; KNL-NEXT:    movl $65535, %eax ## imm = 0xFFFF
+; KNL-NEXT:    movl $65535, %ebx ## imm = 0xFFFF
 ; KNL-NEXT:    movl $0, %r11d
-; KNL-NEXT:    cmovnel %eax, %r11d
+; KNL-NEXT:    cmovnel %ebx, %r11d
 ; KNL-NEXT:    testb $1, {{[0-9]+}}(%rsp)
-; KNL-NEXT:    cmovnel %eax, %r10d
 ; KNL-NEXT:    movq %rdi, %rax
+; KNL-NEXT:    cmovnel %ebx, %r10d
 ; KNL-NEXT:    movzbl {{[0-9]+}}(%rsp), %edi
 ; KNL-NEXT:    andl $1, %edi
 ; KNL-NEXT:    kmovw %edi, %k0
diff --git a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
index ae3777453c68d5..d323db2f96d973 100644
--- a/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
+++ b/llvm/test/CodeGen/X86/avx512-hadd-hsub.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL
+;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=CHECK,KNL
 ;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX
 
 define i32 @hadd_16(<16 x i32> %x225) {
diff --git a/llvm/test/CodeGen/X86/avx512-inc-dec.ll b/llvm/test/CodeGen/X86/avx512-inc-dec.ll
index beef481315d759..803deb9bc4947f 100644
--- a/llvm/test/CodeGen/X86/avx512-inc-dec.ll
+++ b/llvm/test/CodeGen/X86/avx512-inc-dec.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 define i32 @test(i32 %a, i32 %b) {
 ; CHECK-LABEL: test:
diff --git a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
index 25d182afd66e71..24985ad6eb4279 100644
--- a/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
+++ b/llvm/test/CodeGen/X86/avx512-intel-ocl.ll
@@ -1,11 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s -check-prefix=X32
 ; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=skx | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=knl | FileCheck %s -check-prefix=WIN32
+; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s -check-prefix=WIN32
 ; RUN: llc < %s -mtriple=i386-pc-win32 -mcpu=skx | FileCheck %s -check-prefix=WIN32
-; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=knl | FileCheck %s -check-prefixes=WIN64,WIN64-KNL
+; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s -check-prefixes=WIN64,WIN64-KNL
 ; RUN: llc < %s -mtriple=x86_64-win32 -mcpu=skx | FileCheck %s -check-prefixes=WIN64,WIN64-SKX
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s -check-prefixes=X64,X64-KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s -check-prefixes=X64,X64-KNL
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s -check-prefixes=X64,X64-SKX
 
 declare <16 x float> @func_float16_ptr(<16 x float>, ptr)
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll
index e53e194ba05c2a..8267eef1bcc0ca 100644
--- a/llvm/test/CodeGen/X86/avx512-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512-logic.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=ALL --check-prefix=KNL
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=SKX
 
 
diff --git a/llvm/test/CodeGen/X86/avx512-memfold.ll b/llvm/test/CodeGen/X86/avx512-memfold.ll
index 906687569529d8..b874fe7e36a197 100644
--- a/llvm/test/CodeGen/X86/avx512-memfold.ll
+++ b/llvm/test/CodeGen/X86/avx512-memfold.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 define i8 @test_int_x86_avx512_mask_cmp_ss(<4 x float> %a, ptr %b, i8 %mask) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_cmp_ss:
diff --git a/llvm/test/CodeGen/X86/avx512-mov.ll b/llvm/test/CodeGen/X86/avx512-mov.ll
index 88682cea754663..e463d032294c43 100644
--- a/llvm/test/CodeGen/X86/avx512-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512-mov.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper --show-mc-encoding| FileCheck %s
 
 define i32 @test1(float %x) {
 ; CHECK-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/avx512-rotate.ll b/llvm/test/CodeGen/X86/avx512-rotate.ll
index fcf1a5c3e13c2e..cd47d4007c4d2e 100644
--- a/llvm/test/CodeGen/X86/avx512-rotate.ll
+++ b/llvm/test/CodeGen/X86/avx512-rotate.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
 
 declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
diff --git a/llvm/test/CodeGen/X86/avx512-scalar.ll b/llvm/test/CodeGen/X86/avx512-scalar.ll
index 200d36d1167703..7a5bab932af190 100644
--- a/llvm/test/CodeGen/X86/avx512-scalar.ll
+++ b/llvm/test/CodeGen/X86/avx512-scalar.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper --show-mc-encoding | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-KNL
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx --show-mc-encoding | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-SKX
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx --show-mc-encoding | FileCheck %s --check-prefix=AVX
 
diff --git a/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll b/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll
index 9767237c4818e2..e4084a867f3a51 100644
--- a/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
 
 
 define <4 x float> @test_rsqrt14_ss(<4 x float> %a0) {
diff --git a/llvm/test/CodeGen/X86/avx512-scalar_mask.ll b/llvm/test/CodeGen/X86/avx512-scalar_mask.ll
index 9e9fc57cf7b67d..88461a91e79298 100644
--- a/llvm/test/CodeGen/X86/avx512-scalar_mask.ll
+++ b/llvm/test/CodeGen/X86/avx512-scalar_mask.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
diff --git a/llvm/test/CodeGen/X86/avx512-shift.ll b/llvm/test/CodeGen/X86/avx512-shift.ll
index 7a337c75d0d0c3..82a66147e4ef5e 100644
--- a/llvm/test/CodeGen/X86/avx512-shift.ll
+++ b/llvm/test/CodeGen/X86/avx512-shift.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=CHECK,KNL
+;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=CHECK,KNL
 ;RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=CHECK,SKX
 
 define <16 x i32> @ashr_16_i32(<16 x i32> %a) {
diff --git a/llvm/test/CodeGen/X86/avx512-vselect-crash.ll b/llvm/test/CodeGen/X86/avx512-vselect-crash.ll
index 31ccf867f7aaf2..e6790b51fa7385 100644
--- a/llvm/test/CodeGen/X86/avx512-vselect-crash.ll
+++ b/llvm/test/CodeGen/X86/avx512-vselect-crash.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 define <16 x i32> @test() {
 ; CHECK-LABEL: test:
diff --git a/llvm/test/CodeGen/X86/avx512-vselect.ll b/llvm/test/CodeGen/X86/avx512-vselect.ll
index c402e8d7b7714f..e272190593c79b 100644
--- a/llvm/test/CodeGen/X86/avx512-vselect.ll
+++ b/llvm/test/CodeGen/X86/avx512-vselect.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mcpu=skx | FileCheck %s --check-prefixes=CHECK,CHECK-SKX
-; RUN: llc < %s -mcpu=knl | FileCheck %s --check-prefixes=CHECK,CHECK-KNL
+; RUN: llc < %s -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=CHECK,CHECK-KNL
 
 target triple = "x86_64-unknown-unknown"
 
diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll
index 21ae182acc6b05..98b8fc4e5f02a6 100644
--- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-canonical.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=knl -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512F-32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512F-32
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512bw-builtins.c
 
diff --git a/llvm/test/CodeGen/X86/avx512bw-mov.ll b/llvm/test/CodeGen/X86/avx512bw-mov.ll
index 7e2f3620863d7a..82cee05acb5f1d 100644
--- a/llvm/test/CodeGen/X86/avx512bw-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512bw-mov.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw | FileCheck %s
 
 define <64 x i8> @test1(ptr %addr) {
 ; CHECK-LABEL: test1:
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-canonical.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-canonical.ll
index e931b5206e4236..d8ea4e7af967b8 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-canonical.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-canonical.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c
 
diff --git a/llvm/test/CodeGen/X86/avx512bwvl-mov.ll b/llvm/test/CodeGen/X86/avx512bwvl-mov.ll
index 71b26588833de3..e0dc53f9d38d1b 100644
--- a/llvm/test/CodeGen/X86/avx512bwvl-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512bwvl-mov.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
 
 define <32 x i8> @test_256_1(ptr %addr) {
 ; CHECK-LABEL: test_256_1:
diff --git a/llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll b/llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll
index 8d9a927818ce9e..2c0fef7f589baa 100644
--- a/llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512cfma-intrinsics.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
 
 declare <4 x float> @llvm.x86.avx512fp16.mask.vfmadd.cph.128(<4 x float>, <4 x float>, <4 x float>, i8)
 declare <4 x float> @llvm.x86.avx512fp16.maskz.vfmadd.cph.128(<4 x float>, <4 x float>, <4 x float>, i8)
diff --git a/llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll b/llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll
index e37c649a4786d4..c745e7693ac8e2 100644
--- a/llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512cfmul-intrinsics.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
 
 declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.cph.128(<4 x float>, <4 x float>, <4 x float>, i8)
 
diff --git a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
index 43e085f37ff67b..6225ce71b73e9d 100644
--- a/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
+++ b/llvm/test/CodeGen/X86/avx512cfmulsh-instrinsics.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512bw -mattr=+avx512fp16 -mattr=+avx512vl | FileCheck %s
 
 declare <4 x float> @llvm.x86.avx512fp16.mask.vfmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
 declare <4 x float> @llvm.x86.avx512fp16.maskz.vfmul.csh(<4 x float>, <4 x float>, <4 x float>, i8, i32)
diff --git a/llvm/test/CodeGen/X86/avx512vl-arith.ll b/llvm/test/CodeGen/X86/avx512vl-arith.ll
index 1006c5625e26c9..5fdb9c7f05e317 100644
--- a/llvm/test/CodeGen/X86/avx512vl-arith.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-arith.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl,-evex512 --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl,-evex512 --show-mc-encoding| FileCheck %s
 
 ; 256-bit
 
diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll
index 58621967e2aca6..531cc64faf0ec4 100644
--- a/llvm/test/CodeGen/X86/avx512vl-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
 
 ; 256-bit
diff --git a/llvm/test/CodeGen/X86/avx512vl-mov.ll b/llvm/test/CodeGen/X86/avx512vl-mov.ll
index a86c6a726f0169..15da395570fda4 100644
--- a/llvm/test/CodeGen/X86/avx512vl-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-mov.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -mattr=+avx512vl --show-mc-encoding| FileCheck %s
 
 define <8 x i32> @test_256_1(ptr %addr) {
 ; CHECK-LABEL: test_256_1:
diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
index 5b09e45b6fcf17..3f1087b6f84797 100644
--- a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=VLX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=NoVLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=NoVLX
 
 define <4 x i64> @test256_1(<4 x i64> %x, <4 x i64> %y) nounwind {
 ; VLX-LABEL: test256_1:
diff --git a/llvm/test/CodeGen/X86/combine-select.ll b/llvm/test/CodeGen/X86/combine-select.ll
index 7cbb9831ba9ad2..9410088dd92bea 100644
--- a/llvm/test/CodeGen/X86/combine-select.ll
+++ b/llvm/test/CodeGen/X86/combine-select.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 define <4 x float> @select_mask_add_ss(<4 x float> %w, i8 zeroext %u, <4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: select_mask_add_ss:
diff --git a/llvm/test/CodeGen/X86/cpus-intel.ll b/llvm/test/CodeGen/X86/cpus-intel.ll
index 5e4d09e081fec9..1375ae4585640c 100644
--- a/llvm/test/CodeGen/X86/cpus-intel.ll
+++ b/llvm/test/CodeGen/X86/cpus-intel.ll
@@ -88,9 +88,6 @@
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=goldmont-plus 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=goldmont_plus 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=tremont 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
-; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
-; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=mic_avx512 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
-; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=knm 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=raptorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=meteorlake 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
 ; RUN: llc < %s -o /dev/null -mtriple=x86_64-unknown-unknown -mcpu=sierraforest 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ERROR --allow-empty
diff --git a/llvm/test/CodeGen/X86/i1narrowfail.ll b/llvm/test/CodeGen/X86/i1narrowfail.ll
index 0bd091d4a412bb..1b4df8c37cd068 100644
--- a/llvm/test/CodeGen/X86/i1narrowfail.ll
+++ b/llvm/test/CodeGen/X86/i1narrowfail.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 define void @foo(ptr %ptr) {
 ; CHECK-LABEL: foo:
diff --git a/llvm/test/CodeGen/X86/memcpy.ll b/llvm/test/CodeGen/X86/memcpy.ll
index 6ec9b20163051b..6b79d7b582c348 100644
--- a/llvm/test/CodeGen/X86/memcpy.ll
+++ b/llvm/test/CodeGen/X86/memcpy.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=core2     | FileCheck %s -check-prefix=LINUX
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake   | FileCheck %s -check-prefix=LINUX-SKL
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx       | FileCheck %s -check-prefix=LINUX-SKX
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl       | FileCheck %s -check-prefix=LINUX-KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper       | FileCheck %s -check-prefix=LINUX-KNL
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512bw | FileCheck %s -check-prefix=LINUX-AVX512BW
 
 declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture, i64, i1) nounwind
diff --git a/llvm/test/CodeGen/X86/memset-zero.ll b/llvm/test/CodeGen/X86/memset-zero.ll
index 8c33a22a57e994..b17f06424481a3 100644
--- a/llvm/test/CodeGen/X86/memset-zero.ll
+++ b/llvm/test/CodeGen/X86/memset-zero.ll
@@ -4,7 +4,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=nehalem | FileCheck %s --check-prefix=NEHALEM
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=sandybridge | FileCheck %s --check-prefix=SANDYBRIDGE
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=skylake | FileCheck %s --check-prefix=SKYLAKE
-; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=knl | FileCheck %s --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-linux -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefix=KNL
 
 
 declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/CodeGen/X86/pku.ll b/llvm/test/CodeGen/X86/pku.ll
index b6b2f98e299969..e6d93822412f8d 100644
--- a/llvm/test/CodeGen/X86/pku.ll
+++ b/llvm/test/CodeGen/X86/pku.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=knl --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper --show-mc-encoding -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK,X64
 
 declare i32 @llvm.x86.rdpkru()
 declare void @llvm.x86.wrpkru(i32)
diff --git a/llvm/test/CodeGen/X86/pr32451.ll b/llvm/test/CodeGen/X86/pr32451.ll
index 0abc87f832ee83..cec2a9a43b48d0 100644
--- a/llvm/test/CodeGen/X86/pr32451.ll
+++ b/llvm/test/CodeGen/X86/pr32451.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -O0 -mcpu=knl | FileCheck %s
+; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -O0 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 ; ModuleID = 'convert'
 source_filename = "convert"
diff --git a/llvm/test/CodeGen/X86/pr34139.ll b/llvm/test/CodeGen/X86/pr34139.ll
index 93427e2e6cce25..472a6f166615d5 100644
--- a/llvm/test/CodeGen/X86/pr34139.ll
+++ b/llvm/test/CodeGen/X86/pr34139.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=knl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 
 define void @f_f(ptr %ptr) {
 ; CHECK-LABEL: f_f:
diff --git a/llvm/test/CodeGen/X86/recip-fastmath.ll b/llvm/test/CodeGen/X86/recip-fastmath.ll
index 7e9bbc55564248..98be0fba82010d 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath.ll
@@ -7,7 +7,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,SANDY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell | FileCheck %s --check-prefixes=AVX,HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefixes=AVX,HASWELL-NO-FMA
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX,AVX512,KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX512,KNL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX512,SKX
 
 ; If the target's divss/divps instructions are substantially
diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll
index 2a5e46bba2c009..202cbd144ed047 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath2.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll
@@ -7,7 +7,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,SANDY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell     | FileCheck %s --check-prefixes=AVX,HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefixes=AVX,HASWELL-NO-FMA
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl         | FileCheck %s --check-prefixes=AVX,AVX512,KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper         | FileCheck %s --check-prefixes=AVX,AVX512,KNL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx         | FileCheck %s --check-prefixes=AVX,AVX512,SKX
 
 ; It's the extra tests coverage for recip as discussed on D26855.
diff --git a/llvm/test/CodeGen/X86/setcc-lowering.ll b/llvm/test/CodeGen/X86/setcc-lowering.ll
index 705e48ca4c9c9b..c315df214e359e 100644
--- a/llvm/test/CodeGen/X86/setcc-lowering.ll
+++ b/llvm/test/CodeGen/X86/setcc-lowering.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
-; RUN: llc -mtriple=i386-unknown-linux-gnu -mcpu=knl < %s   | FileCheck %s --check-prefix=KNL-32
+; RUN: llc -mtriple=i386-unknown-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s   | FileCheck %s --check-prefix=KNL-32
 
 
 ; Verify that we don't crash during codegen due to a wrong lowering
diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll
index 8e330c3bfc676a..a2785e170436d1 100644
--- a/llvm/test/CodeGen/X86/slow-pmulld.ll
+++ b/llvm/test/CodeGen/X86/slow-pmulld.ll
@@ -13,8 +13,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,AVX512DQ-64
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,AVX512BW-32
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,AVX512BW-64
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,KNL-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,KNL-64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,KNL-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,KNL-64
 
 ; Make sure that the slow-pmulld feature can be used without SSE4.1.
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont -mattr=-sse4.1
diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
index d74d195439bdae..e06fc2fae56118 100644
--- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -21,7 +21,7 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge      2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX128
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell        2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell      2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl            2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper            2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX512
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake-avx512 2>&1 | FileCheck %s --check-prefixes=FAST,FAST-AVX256
 
 ; AMD chips with slow unaligned memory accesses
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
index 4668d7b6870ef2..372da59a5d8df0 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck --check-prefixes=ALL,KNL %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck --check-prefixes=ALL,KNL %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefixes=ALL,SKX %s
 
 target triple = "x86_64-unknown-unknown"
diff --git a/llvm/test/CodeGen/X86/vselect-minmax.ll b/llvm/test/CodeGen/X86/vselect-minmax.ll
index cb0542ca7cea8b..4d553cc04f7a2c 100644
--- a/llvm/test/CodeGen/X86/vselect-minmax.ll
+++ b/llvm/test/CodeGen/X86/vselect-minmax.ll
@@ -3,7 +3,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE4
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW
 
 define <16 x i8> @test1(<16 x i8> %a, <16 x i8> %b) {
diff --git a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll
index cf8b7096816af7..fdd2f824f2ff4b 100644
--- a/llvm/test/CodeGen/X86/x86-interrupt_cc.ll
+++ b/llvm/test/CodeGen/X86/x86-interrupt_cc.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK64-KNL
+; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s -check-prefix=CHECK64-KNL
 ; RUN: llc -verify-machineinstrs -mtriple=x86_64-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK64-SKX
-; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=knl < %s | FileCheck %s -check-prefix=CHECK32-KNL
+; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s -check-prefix=CHECK32-KNL
 ; RUN: llc -verify-machineinstrs -mtriple=i386-apple-macosx -show-mc-encoding -mcpu=skx < %s | FileCheck %s -check-prefix=CHECK32-SKX
 
 ; Make sure we spill the high numbered zmm registers and K registers with the right encoding.
diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll
index 5796e485f63148..e06f5332936525 100644
--- a/llvm/test/CodeGen/X86/xaluo.ll
+++ b/llvm/test/CodeGen/X86/xaluo.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefixes=CHECK,SDAG,GENERIC
 ; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,FAST
-; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,SDAG,KNL
+; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s --check-prefixes=CHECK,SDAG,KNL
 
 ;
 ; Get the actual value of the overflow bit.
diff --git a/llvm/test/CodeGen/X86/xmulo.ll b/llvm/test/CodeGen/X86/xmulo.ll
index 6eb34b4e773e8f..b605307d393154 100644
--- a/llvm/test/CodeGen/X86/xmulo.ll
+++ b/llvm/test/CodeGen/X86/xmulo.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG
 ; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,LINUX,FAST
-; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=knl < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG
+; RUN: llc -disable-peephole -mtriple=x86_64-linux-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s --check-prefixes=CHECK,LINUX,SDAG
 ; RUN: llc -disable-peephole -mtriple=x86_64-pc-win32 < %s | FileCheck %s --check-prefixes=CHECK,WIN64
 ; RUN: llc -disable-peephole -mtriple=i386-pc-win32 < %s | FileCheck %s --check-prefix=WIN32
 
diff --git a/llvm/test/MC/Disassembler/X86/avx-512.txt b/llvm/test/MC/Disassembler/X86/avx-512.txt
index 7c6f9d79ebd9b0..7ebce9e391757c 100644
--- a/llvm/test/MC/Disassembler/X86/avx-512.txt
+++ b/llvm/test/MC/Disassembler/X86/avx-512.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s
+# RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s
 # RUN: llvm-mc --disassemble %s -triple=x86_64-apple-darwin9 -mcpu=skx | FileCheck --check-prefix=CHECK-SKX %s
 
 # CHECK: vpbroadcastd    %xmm18, %zmm28 {%k7} {z}
diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s
index 6136c3db9a3daa..e427f730bf8740 100644
--- a/llvm/test/MC/X86/x86_long_nop.s
+++ b/llvm/test/MC/X86/x86_long_nop.s
@@ -27,7 +27,7 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knl %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
+# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knm %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 
 # Ensure alignment directives also emit sequences of 10, 11 and 15-byte NOPs on processors
diff --git a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
index e9541c1ee035f2..8d81c59a9b0ef7 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=loop-vectorize,simplifycfg -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
-; RUN: opt < %s -passes=loop-vectorize,simplifycfg -mcpu=knl -force-vector-width=2 -force-target-max-vector-interleave=1 -S | FileCheck %s -check-prefix=FVW2
+; RUN: opt < %s -passes=loop-vectorize,simplifycfg -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S | FileCheck %s -check-prefix=AVX512
+; RUN: opt < %s -passes=loop-vectorize,simplifycfg -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -force-vector-width=2 -force-target-max-vector-interleave=1 -S | FileCheck %s -check-prefix=FVW2
 
 ; With a force-vector-width, it is sometimes more profitable to generate
 ; scalarized and predicated stores instead of masked scatter. Disable
diff --git a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index 5cc4d43ec2e49f..b48965e366d9d6 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -passes=loop-vectorize -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1
 ; RUN: opt < %s -passes=loop-vectorize -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2
-; RUN: opt < %s -passes=loop-vectorize -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
+; RUN: opt < %s -passes=loop-vectorize -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S | FileCheck %s -check-prefix=AVX512
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc_linux"
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
index 6c21cc1cfc5be8..c216d57c5317c0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls-inseltpoison.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2
 
 define <8 x float> @ceil_floor(<8 x float> %a) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll
index bc5bcee361168a..18363fe3887253 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-calls.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=AVX2
 
 define <8 x float> @ceil_floor(<8 x float> %a) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
index e24c52ba81ddf4..bc0f84b865ac8c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast-inseltpoison.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s
 
 define <8 x float> @sitofp_uitofp(<8 x i32> %a) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
index 0f8751a6da7f57..03e31913872e8c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-cast.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s
 
 define <8 x float> @sitofp_uitofp(<8 x i32> %a) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll
index 5a1de4f3e3d7fd..9aa273d793510e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp-inseltpoison.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
 
 define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll
index 046ed781f4c8d0..be3b86cbdd9d87 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-fp.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
 
 define <8 x float> @fadd_fsub_v8f32(<8 x float> %a, <8 x float> %b) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
index 8e878f3f8b80fa..8aeead6a7bea06 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int-inseltpoison.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
 
 define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
index 0b17e19e4fadd8..a2620181e3c22c 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/alternate-int.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX512
 
 define <8 x i32> @add_sub_v8i32(<8 x i32> %a, <8 x i32> %b) {
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll
index c37f260fad015f..0fbef6ccfe3a14 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-abs.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll
index 8d7dd9b9621c89..57e5a5b8da2870 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-saddo.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
index 24c5fcb0680865..61a4810a0bcf89 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-ssat.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll
index fc67cec60f1772..fbcb7f9c39463a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-uaddo.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll
index fab022d691c07a..a0d3a3e92392e5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add-usat.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll
index dafed43e6e71c1..b84d2a56a78e91 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-add.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
index 33fd3e6dc0e09f..eabf866f57624a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-div.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll
index e4c76daddb02e4..57222f791f72e2 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fix.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256BW
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll
index 7f986c74f207f4..ec970e3761657f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl-rot.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 
 @a64 = common global [8 x i64] zeroinitializer, align 64
 @b64 = common global [8 x i64] zeroinitializer, align 64
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll
index 5153dc34e7a4ff..d5b6a27f115a06 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshl.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 
 @a64 = common global [8 x i64] zeroinitializer, align 64
 @b64 = common global [8 x i64] zeroinitializer, align 64
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll
index 52c6f14f28e180..5db12906207786 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr-rot.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 
 @a64 = common global [8 x i64] zeroinitializer, align 64
 @b64 = common global [8 x i64] zeroinitializer, align 64
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll
index b456742337abd8..4edb6742f04ce1 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-fshr.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX256
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 
 @a64 = common global [8 x i64] zeroinitializer, align 64
 @b64 = common global [8 x i64] zeroinitializer, align 64
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll
index 72a3ddd0bb7471..6cdec593043947 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-smulo.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll
index 4126f06e8ca818..e2f62663f77188 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul-umulo.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll
index 94976a8cdee252..3678b0389f6593 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-mul.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX256
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX128
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX256
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX256
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll
index c63b672f4187cd..34470ecca3f0f0 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-smax.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll
index 826f97f2a2d895..eddcfd0e22d6e3 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-smin.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
index afaab8b8ca642b..7cf590955c991b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssat.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll
index d628dddd16cb19..1c265a057f118b 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-ssubo.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll
index 3510863c889301..ffdb05e5e43836 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usat.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll
index 11a68a5dfbccaf..ccf9a4651dfcaa 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub-usubo.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll
index be54c1e04ca39a..5028999d5f25f6 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-sub.ll
@@ -5,7 +5,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=-prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -mattr=+prefer-128-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll
index 3a187930055f0e..0f9b07e03b90f5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-umax.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll b/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll
index 15119a96280673..7102e6de4a083f 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/arith-umin.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX
 
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
index 4a9f717918a029..21a82cf169d112 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd-inseltpoison.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
 
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
index 31e3e6aa0a833d..0efa6e021d0019 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hadd.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
 
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
index 40b6a8c32f5d02..d14facddab9923 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub-inseltpoison.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
 
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
index 09113323d3ab77..3a8d35d4d09eb5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/hsub.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -passes=slp-vectorizer,instcombine -S | FileCheck %s --check-prefixes=CHECK,AVX,AVX512
 
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
index 5ae0ad932fdddb..71c65868576339 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext-inseltpoison.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
index 7d38aeb0c36357..a5751c31780dbd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/sext.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SSE,SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll
index 51798deae694a3..806ca1c2efc5e7 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-ashr.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX1
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX,AVX2
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=XOP
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll
index 7583561bbecf90..df597b18646c15 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-lshr.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=XOP
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll b/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll
index 5ec327c131fb78..dc8f37ea211189 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/shift-shl.ll
@@ -2,7 +2,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s --check-prefix=SSE
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=-prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefix=AVX512
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+prefer-256-bit -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -passes=slp-vectorizer -S | FileCheck %s --check-prefix=XOP
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
index d1f6c41e5c30ec..ea98d3cd0917da 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext-inseltpoison.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
index 829e4bab20ffaf..b881d489daf9d4 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/zext.ll
@@ -3,7 +3,7 @@
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=slm -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=SLM
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
-; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=knl -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 ; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skx -mattr=+avx512bw -passes=slp-vectorizer -S | FileCheck %s --check-prefixes=AVX
 
 ;
diff --git a/llvm/test/tools/llvm-mca/X86/cpus.s b/llvm/test/tools/llvm-mca/X86/cpus.s
index 38e0365dc80eb2..9db78a75dc2d58 100644
--- a/llvm/test/tools/llvm-mca/X86/cpus.s
+++ b/llvm/test/tools/llvm-mca/X86/cpus.s
@@ -10,7 +10,7 @@
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,IVYBRIDGE %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,HASWELL %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,BROADWELL %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,KNL %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,KNL %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,SKX %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,SKX-AVX512 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-client -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,ICX %s
diff --git a/llvm/test/tools/llvm-mca/X86/register-file-statistics.s b/llvm/test/tools/llvm-mca/X86/register-file-statistics.s
index fd90a6ec59977c..3613b29cf6303f 100644
--- a/llvm/test/tools/llvm-mca/X86/register-file-statistics.s
+++ b/llvm/test/tools/llvm-mca/X86/register-file-statistics.s
@@ -11,7 +11,7 @@
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-client -iterations=1 -all-stats=false -all-views=false -register-file-stats < %s | FileCheck --check-prefixes=ALL %s
diff --git a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
index 2d5c0483de7dfb..1f70a2628f9f08 100644
--- a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
+++ b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
@@ -10,7 +10,7 @@
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,IVB %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,HSW %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,BDW %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,KNL %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,KNL %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SKX %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SKX-AVX512 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-client -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ICX %s

>From edd636d923b35aa424023d48896ee89740971d54 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 21 Dec 2023 17:07:12 +0800
Subject: [PATCH 4/7] [X86][BE] Workaround special tests to work.

---
 llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll       |  2 +-
 llvm/test/CodeGen/X86/recip-fastmath2.ll               |  2 +-
 llvm/test/CodeGen/X86/slow-pmulld.ll                   |  4 ++--
 llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll        |  2 +-
 llvm/test/CodeGen/X86/xaluo.ll                         |  2 +-
 llvm/test/MC/X86/x86_long_nop.s                        |  2 --
 .../LoadStoreVectorizer/X86/load-width-inseltpoison.ll |  4 ++--
 .../Transforms/LoadStoreVectorizer/X86/load-width.ll   |  4 ++--
 .../test/Transforms/LoopVectorize/X86/scatter_crash.ll |  2 +-
 llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll   |  2 +-
 llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s   | 10 ----------
 11 files changed, 12 insertions(+), 24 deletions(-)

diff --git a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
index bb86f307afa814..7676a65b735e06 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
@@ -48,5 +48,5 @@ entry:
 ; Function Attrs: nounwind readnone
 declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) #1
 
-attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/recip-fastmath2.ll b/llvm/test/CodeGen/X86/recip-fastmath2.ll
index 202cbd144ed047..cc5c2b21216222 100644
--- a/llvm/test/CodeGen/X86/recip-fastmath2.ll
+++ b/llvm/test/CodeGen/X86/recip-fastmath2.ll
@@ -7,7 +7,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge | FileCheck %s --check-prefixes=AVX,SANDY
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell     | FileCheck %s --check-prefixes=AVX,HASWELL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=-fma | FileCheck %s --check-prefixes=AVX,HASWELL-NO-FMA
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper         | FileCheck %s --check-prefixes=AVX,AVX512,KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper         | FileCheck %s --check-prefixes=AVX,AVX512,KNL
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx         | FileCheck %s --check-prefixes=AVX,AVX512,SKX
 
 ; It's the extra tests coverage for recip as discussed on D26855.
diff --git a/llvm/test/CodeGen/X86/slow-pmulld.ll b/llvm/test/CodeGen/X86/slow-pmulld.ll
index a2785e170436d1..2e1e4f16962151 100644
--- a/llvm/test/CodeGen/X86/slow-pmulld.ll
+++ b/llvm/test/CodeGen/X86/slow-pmulld.ll
@@ -13,8 +13,8 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,AVX512DQ-64
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,AVX512BW-32
 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,AVX512BW-64
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,KNL-32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,KNL-64
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper,+slow-pmaddwd | FileCheck %s --check-prefixes=AVX2,AVX-32,AVX512-32,KNL-32
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper,+slow-pmaddwd | FileCheck %s --check-prefixes=AVX2,AVX-64,AVX512-64,KNL-64
 
 ; Make sure that the slow-pmulld feature can be used without SSE4.1.
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont -mattr=-sse4.1
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
index 372da59a5d8df0..56f45808265131 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper | FileCheck --check-prefixes=ALL,KNL %s
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=haswell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper,-fast-variable-perlane-shuffle | FileCheck --check-prefixes=ALL,KNL %s
 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefixes=ALL,SKX %s
 
 target triple = "x86_64-unknown-unknown"
diff --git a/llvm/test/CodeGen/X86/xaluo.ll b/llvm/test/CodeGen/X86/xaluo.ll
index e06f5332936525..bfac979519ee5f 100644
--- a/llvm/test/CodeGen/X86/xaluo.ll
+++ b/llvm/test/CodeGen/X86/xaluo.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=x86_64-darwin-unknown < %s | FileCheck %s --check-prefixes=CHECK,SDAG,GENERIC
 ; RUN: llc -mtriple=x86_64-darwin-unknown -fast-isel -fast-isel-abort=1 < %s | FileCheck %s --check-prefixes=CHECK,FAST
-; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper < %s | FileCheck %s --check-prefixes=CHECK,SDAG,KNL
+; RUN: llc -mtriple=x86_64-darwin-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper,+slow-incdec < %s | FileCheck %s --check-prefixes=CHECK,SDAG,KNL
 
 ;
 ; Get the actual value of the overflow bit.
diff --git a/llvm/test/MC/X86/x86_long_nop.s b/llvm/test/MC/X86/x86_long_nop.s
index e427f730bf8740..c0219aca787b38 100644
--- a/llvm/test/MC/X86/x86_long_nop.s
+++ b/llvm/test/MC/X86/x86_long_nop.s
@@ -27,8 +27,6 @@
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skylake %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
 # RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=skx %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP15 %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
-# RUN: llvm-mc -filetype=obj -arch=x86 -triple=i686-pc-linux-gnu -mcpu=knm %s | llvm-objdump -d --no-show-raw-insn - | FileCheck --check-prefix=LNOP10 %s
 
 # Ensure alignment directives also emit sequences of 10, 11 and 15-byte NOPs on processors
 # capable of using long NOPs.
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width-inseltpoison.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width-inseltpoison.ll
index a38aacfc3ce0ed..2728ae15befaf9 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width-inseltpoison.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width-inseltpoison.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
 
 define <8 x double> @loadwidth_insert_extract(ptr %ptr) {
     %b = getelementptr <2 x double>, ptr %ptr, i32 1
diff --git a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll
index f225762d438015..6d7ccd370fcea8 100644
--- a/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll
+++ b/llvm/test/Transforms/LoadStoreVectorizer/X86/load-width.ll
@@ -1,7 +1,7 @@
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -passes=load-store-vectorizer -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
 ; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu haswell -S -o - %s | FileCheck --check-prefix=CHECK-HSW %s
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu knl -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -S -o - %s | FileCheck --check-prefix=CHECK-KNL %s
 
 define <8 x double> @loadwidth_insert_extract(ptr %ptr) {
     %b = getelementptr <2 x double>, ptr %ptr, i32 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index bf2b9e2aef85a5..4839e3edf7b4df 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -111,4 +111,4 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99
 }
 
-attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
index b8c551c7b771d2..02bf77a5e103dd 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
@@ -26,5 +26,5 @@ entry:
   unreachable
 }
 
-attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
diff --git a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
index 1f70a2628f9f08..7f9a501b57a156 100644
--- a/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
+++ b/llvm/test/tools/llvm-mca/X86/scheduler-queue-usage.s
@@ -10,7 +10,6 @@
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,IVB %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,HSW %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,BDW %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -mattr=+avx512f,+avx512cd,+evex512,-vzeroupper -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,KNL %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SKX %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,SKX-AVX512 %s
 # RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-client -iterations=1 -all-stats=false -all-views=false -scheduler-stats < %s | FileCheck --check-prefixes=ALL,ICX %s
@@ -68,12 +67,6 @@ xor %eax, %ebx
 # IVB-NEXT:        [3] Maximum number of used buffer entries.
 # IVB-NEXT:        [4] Total number of buffer entries.
 
-# KNL:             Scheduler's queue usage:
-# KNL-NEXT:        [1] Resource name.
-# KNL-NEXT:        [2] Average number of used buffer entries.
-# KNL-NEXT:        [3] Maximum number of used buffer entries.
-# KNL-NEXT:        [4] Total number of buffer entries.
-
 # SKX:             Scheduler's queue usage:
 # SKX-NEXT:        [1] Resource name.
 # SKX-NEXT:        [2] Average number of used buffer entries.
@@ -145,9 +138,6 @@ xor %eax, %ebx
 # IVB:              [1]            [2]        [3]        [4]
 # IVB-NEXT:        SBPortAny        0          1          54
 
-# KNL:              [1]            [2]        [3]        [4]
-# KNL-NEXT:        HWPortAny        0          1          60
-
 # SKX:              [1]            [2]        [3]        [4]
 # SKX-NEXT:        SKLPortAny       0          1          60
 

>From 6470ba478914b5e6fbec1857848c957ccf2816e1 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 26 Dec 2023 09:51:43 +0800
Subject: [PATCH 5/7] Remove AVX512ER intrinsic supports.

---
 clang/include/clang/Basic/BuiltinsX86.def     |  12 -
 clang/include/clang/Driver/Options.td         |   2 -
 clang/lib/Basic/Targets/X86.cpp               |   6 -
 clang/lib/Basic/Targets/X86.h                 |   1 -
 clang/lib/Headers/CMakeLists.txt              |   1 -
 clang/lib/Headers/avx512erintrin.h            | 271 -------------
 clang/lib/Headers/cpuid.h                     |   1 -
 clang/lib/Headers/immintrin.h                 |   5 -
 clang/lib/Sema/SemaChecking.cpp               |  10 -
 clang/test/CodeGen/X86/avx512er-builtins.c    | 347 ----------------
 clang/test/CodeGen/function-target-features.c |   4 +-
 clang/test/CodeGen/target-builtin-noerror.c   |   1 -
 clang/test/Driver/x86-target-features.c       |   5 -
 clang/test/Preprocessor/x86_target_features.c |  18 -
 llvm/include/llvm/IR/IntrinsicsX86.td         |  52 ---
 llvm/lib/Target/X86/X86.td                    |   3 -
 llvm/lib/Target/X86/X86InstrAVX512.td         |  78 +++-
 llvm/lib/Target/X86/X86InstrPredicates.td     |   1 -
 llvm/lib/Target/X86/X86IntrinsicsInfo.h       |  10 -
 llvm/lib/TargetParser/Host.cpp                |   5 -
 llvm/test/CodeGen/X86/avx512er-intrinsics.ll  | 306 --------------
 llvm/test/CodeGen/X86/crc32-target-feature.ll |   4 +-
 llvm/test/CodeGen/X86/unfoldMemoryOperand.mir |   2 +-
 .../LoopStrengthReduce/X86/pr40514.ll         |   2 +-
 .../Transforms/LoopVectorize/X86/pr23997.ll   |   2 +-
 .../Transforms/LoopVectorize/X86/pr54634.ll   |   2 +-
 .../llvm-mca/X86/Generic/resources-avx512er.s | 373 ------------------
 .../gn/secondary/clang/lib/Headers/BUILD.gn   |   1 -
 28 files changed, 71 insertions(+), 1454 deletions(-)
 delete mode 100644 clang/lib/Headers/avx512erintrin.h
 delete mode 100644 clang/test/CodeGen/X86/avx512er-builtins.c
 delete mode 100644 llvm/test/CodeGen/X86/avx512er-intrinsics.ll
 delete mode 100644 llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 60b752ad48548f..00a69b16a51283 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -839,23 +839,11 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx
 TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
-
 TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
-
 TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2b93ddf033499c..a2f26b9ca4c356 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5865,8 +5865,6 @@ def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
 def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
 def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
 def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
-def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
-def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
 def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>;
 def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group<m_x86_Features_Group>;
 def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 2483bc8b08f014..8b0e021a488424 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -293,8 +293,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasAVX512VNNI = true;
     } else if (Feature == "+avx512bf16") {
       HasAVX512BF16 = true;
-    } else if (Feature == "+avx512er") {
-      HasAVX512ER = true;
     } else if (Feature == "+avx512fp16") {
       HasAVX512FP16 = true;
       HasLegalHalfType = true;
@@ -813,8 +811,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AVX512VNNI__");
   if (HasAVX512BF16)
     Builder.defineMacro("__AVX512BF16__");
-  if (HasAVX512ER)
-    Builder.defineMacro("__AVX512ER__");
   if (HasAVX512FP16)
     Builder.defineMacro("__AVX512FP16__");
   if (HasAVX512PF)
@@ -1052,7 +1048,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("avx512vpopcntdq", true)
       .Case("avx512vnni", true)
       .Case("avx512bf16", true)
-      .Case("avx512er", true)
       .Case("avx512fp16", true)
       .Case("avx512pf", true)
       .Case("avx512dq", true)
@@ -1168,7 +1163,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
       .Case("avx512vnni", HasAVX512VNNI)
       .Case("avx512bf16", HasAVX512BF16)
-      .Case("avx512er", HasAVX512ER)
       .Case("avx512fp16", HasAVX512FP16)
       .Case("avx512pf", HasAVX512PF)
       .Case("avx512dq", HasAVX512DQ)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 0ab1c10833db26..a890348073e889 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -103,7 +103,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasAVX512VNNI = false;
   bool HasAVX512FP16 = false;
   bool HasAVX512BF16 = false;
-  bool HasAVX512ER = false;
   bool HasAVX512PF = false;
   bool HasAVX512DQ = false;
   bool HasAVX512BITALG = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 735e4e4e3be89b..09c62d14085df2 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -151,7 +151,6 @@ set(x86_files
   avx512bwintrin.h
   avx512cdintrin.h
   avx512dqintrin.h
-  avx512erintrin.h
   avx512fintrin.h
   avx512fp16intrin.h
   avx512ifmaintrin.h
diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h
deleted file mode 100644
index 1c5a2d2d208ff4..00000000000000
--- a/clang/lib/Headers/avx512erintrin.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef __AVX512ERINTRIN_H
-#define __AVX512ERINTRIN_H
-
-/* exp2a23 */
-#define _mm512_exp2a23_round_pd(A, R) \
-  ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
-                                       (__v8df)_mm512_setzero_pd(), \
-                                       (__mmask8)-1, (int)(R)))
-
-#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
-  ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
-                                       (__v8df)(__m512d)(S), (__mmask8)(M), \
-                                       (int)(R)))
-
-#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
-  ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
-                                       (__v8df)_mm512_setzero_pd(), \
-                                       (__mmask8)(M), (int)(R)))
-
-#define _mm512_exp2a23_pd(A) \
-  _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_exp2a23_pd(S, M, A) \
-  _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_exp2a23_pd(M, A) \
-  _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_exp2a23_round_ps(A, R) \
-  ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
-                                      (__v16sf)_mm512_setzero_ps(), \
-                                      (__mmask16)-1, (int)(R)))
-
-#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
-  ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
-                                      (__v16sf)(__m512)(S), (__mmask16)(M), \
-                                      (int)(R)))
-
-#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
-  ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
-                                      (__v16sf)_mm512_setzero_ps(), \
-                                      (__mmask16)(M), (int)(R)))
-
-#define _mm512_exp2a23_ps(A) \
-  _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_exp2a23_ps(S, M, A) \
-  _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_exp2a23_ps(M, A) \
-  _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-/* rsqrt28 */
-#define _mm512_rsqrt28_round_pd(A, R) \
-  ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
-                                          (__v8df)_mm512_setzero_pd(), \
-                                          (__mmask8)-1, (int)(R)))
-
-#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
-  ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
-                                          (__v8df)(__m512d)(S), (__mmask8)(M), \
-                                          (int)(R)))
-
-#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
-  ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
-                                          (__v8df)_mm512_setzero_pd(), \
-                                          (__mmask8)(M), (int)(R)))
-
-#define _mm512_rsqrt28_pd(A) \
-  _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rsqrt28_pd(S, M, A) \
-  _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rsqrt28_pd(M, A) \
-  _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rsqrt28_round_ps(A, R) \
-  ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
-                                         (__v16sf)_mm512_setzero_ps(), \
-                                         (__mmask16)-1, (int)(R)))
-
-#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
-  ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
-                                         (__v16sf)(__m512)(S), (__mmask16)(M), \
-                                         (int)(R)))
-
-#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
-  ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
-                                         (__v16sf)_mm512_setzero_ps(), \
-                                         (__mmask16)(M), (int)(R)))
-
-#define _mm512_rsqrt28_ps(A) \
-  _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rsqrt28_ps(S, M, A) \
-  _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rsqrt28_ps(M, A) \
-  _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rsqrt28_round_ss(A, B, R) \
-  ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
-                                               (__v4sf)(__m128)(B), \
-                                               (__v4sf)_mm_setzero_ps(), \
-                                               (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
-  ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
-                                               (__v4sf)(__m128)(B), \
-                                               (__v4sf)(__m128)(S), \
-                                               (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
-  ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
-                                               (__v4sf)(__m128)(B), \
-                                               (__v4sf)_mm_setzero_ps(), \
-                                               (__mmask8)(M), (int)(R)))
-
-#define _mm_rsqrt28_ss(A, B) \
-  _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rsqrt28_ss(S, M, A, B) \
-  _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rsqrt28_ss(M, A, B) \
-  _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rsqrt28_round_sd(A, B, R) \
-  ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
-                                                (__v2df)(__m128d)(B), \
-                                                (__v2df)_mm_setzero_pd(), \
-                                                (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
-  ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
-                                                (__v2df)(__m128d)(B), \
-                                                (__v2df)(__m128d)(S), \
-                                                (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
-  ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
-                                                (__v2df)(__m128d)(B), \
-                                                (__v2df)_mm_setzero_pd(), \
-                                                (__mmask8)(M), (int)(R)))
-
-#define _mm_rsqrt28_sd(A, B) \
-  _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rsqrt28_sd(S, M, A, B) \
-  _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rsqrt28_sd(M, A, B) \
-  _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-/* rcp28 */
-#define _mm512_rcp28_round_pd(A, R) \
-  ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
-                                        (__v8df)_mm512_setzero_pd(), \
-                                        (__mmask8)-1, (int)(R)))
-
-#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
-  ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
-                                        (__v8df)(__m512d)(S), (__mmask8)(M), \
-                                        (int)(R)))
-
-#define _mm512_maskz_rcp28_round_pd(M, A, R) \
-  ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
-                                        (__v8df)_mm512_setzero_pd(), \
-                                        (__mmask8)(M), (int)(R)))
-
-#define _mm512_rcp28_pd(A) \
-  _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rcp28_pd(S, M, A) \
-  _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rcp28_pd(M, A) \
-  _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rcp28_round_ps(A, R) \
-  ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
-                                       (__v16sf)_mm512_setzero_ps(), \
-                                       (__mmask16)-1, (int)(R)))
-
-#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
-  ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
-                                       (__v16sf)(__m512)(S), (__mmask16)(M), \
-                                       (int)(R)))
-
-#define _mm512_maskz_rcp28_round_ps(M, A, R) \
-  ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
-                                       (__v16sf)_mm512_setzero_ps(), \
-                                       (__mmask16)(M), (int)(R)))
-
-#define _mm512_rcp28_ps(A) \
-  _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rcp28_ps(S, M, A) \
-  _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rcp28_ps(M, A) \
-  _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rcp28_round_ss(A, B, R) \
-  ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
-                                             (__v4sf)(__m128)(B), \
-                                             (__v4sf)_mm_setzero_ps(), \
-                                             (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
-  ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
-                                             (__v4sf)(__m128)(B), \
-                                             (__v4sf)(__m128)(S), \
-                                             (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
-  ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
-                                             (__v4sf)(__m128)(B), \
-                                             (__v4sf)_mm_setzero_ps(), \
-                                             (__mmask8)(M), (int)(R)))
-
-#define _mm_rcp28_ss(A, B) \
-  _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rcp28_ss(S, M, A, B) \
-  _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rcp28_ss(M, A, B) \
-  _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rcp28_round_sd(A, B, R) \
-  ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
-                                              (__v2df)(__m128d)(B), \
-                                              (__v2df)_mm_setzero_pd(), \
-                                              (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
-  ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
-                                              (__v2df)(__m128d)(B), \
-                                              (__v2df)(__m128d)(S), \
-                                              (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
-  ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
-                                              (__v2df)(__m128d)(B), \
-                                              (__v2df)_mm_setzero_pd(), \
-                                              (__mmask8)(M), (int)(R)))
-
-#define _mm_rcp28_sd(A, B) \
-  _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rcp28_sd(S, M, A, B) \
-  _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rcp28_sd(M, A, B) \
-  _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#endif /* __AVX512ERINTRIN_H */
diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index 1ad6853a97c9d2..b09ca3585d6067 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -160,7 +160,6 @@
 #define bit_CLFLUSHOPT  0x00800000
 #define bit_CLWB        0x01000000
 #define bit_AVX512PF    0x04000000
-#define bit_AVX512ER    0x08000000
 #define bit_AVX512CD    0x10000000
 #define bit_SHA         0x20000000
 #define bit_AVX512BW    0x40000000
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 0149a1cdea633b..876392e9a5daf0 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -174,11 +174,6 @@
 #include <avx512vldqintrin.h>
 #endif
 
-#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
-    defined(__AVX512ER__)
-#include <avx512erintrin.h>
-#endif
-
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__AVX512IFMA__)
 #include <avx512ifmaintrin.h>
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 66dac99b8d9922..e14bb9b1287b12 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -6300,15 +6300,9 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_vcvttph2udq512_mask:
   case X86::BI__builtin_ia32_vcvttph2qq512_mask:
   case X86::BI__builtin_ia32_vcvttph2uqq512_mask:
-  case X86::BI__builtin_ia32_exp2pd_mask:
-  case X86::BI__builtin_ia32_exp2ps_mask:
   case X86::BI__builtin_ia32_getexppd512_mask:
   case X86::BI__builtin_ia32_getexpps512_mask:
   case X86::BI__builtin_ia32_getexpph512_mask:
-  case X86::BI__builtin_ia32_rcp28pd_mask:
-  case X86::BI__builtin_ia32_rcp28ps_mask:
-  case X86::BI__builtin_ia32_rsqrt28pd_mask:
-  case X86::BI__builtin_ia32_rsqrt28ps_mask:
   case X86::BI__builtin_ia32_vcomisd:
   case X86::BI__builtin_ia32_vcomiss:
   case X86::BI__builtin_ia32_vcomish:
@@ -6335,16 +6329,12 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
   case X86::BI__builtin_ia32_minsd_round_mask:
   case X86::BI__builtin_ia32_minss_round_mask:
   case X86::BI__builtin_ia32_minsh_round_mask:
-  case X86::BI__builtin_ia32_rcp28sd_round_mask:
-  case X86::BI__builtin_ia32_rcp28ss_round_mask:
   case X86::BI__builtin_ia32_reducepd512_mask:
   case X86::BI__builtin_ia32_reduceps512_mask:
   case X86::BI__builtin_ia32_reduceph512_mask:
   case X86::BI__builtin_ia32_rndscalepd_mask:
   case X86::BI__builtin_ia32_rndscaleps_mask:
   case X86::BI__builtin_ia32_rndscaleph_mask:
-  case X86::BI__builtin_ia32_rsqrt28sd_round_mask:
-  case X86::BI__builtin_ia32_rsqrt28ss_round_mask:
     ArgNum = 4;
     break;
   case X86::BI__builtin_ia32_fixupimmpd512_mask:
diff --git a/clang/test/CodeGen/X86/avx512er-builtins.c b/clang/test/CodeGen/X86/avx512er-builtins.c
deleted file mode 100644
index ee31236a3c01aa..00000000000000
--- a/clang/test/CodeGen/X86/avx512er-builtins.c
+++ /dev/null
@@ -1,347 +0,0 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512er -emit-llvm -o - -Wall -Werror | FileCheck %s
-
-
-#include <immintrin.h>
-
-__m512d test_mm512_rsqrt28_round_pd(__m512d a) {
-  // CHECK-LABEL: @test_mm512_rsqrt28_round_pd
-  // CHECK: @llvm.x86.avx512.rsqrt28.pd
-  return _mm512_rsqrt28_round_pd(a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_mask_rsqrt28_round_pd(__m512d s, __mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_mask_rsqrt28_round_pd
-  // CHECK: @llvm.x86.avx512.rsqrt28.pd
-  return _mm512_mask_rsqrt28_round_pd(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_maskz_rsqrt28_round_pd(__mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_pd
-  // CHECK: @llvm.x86.avx512.rsqrt28.pd
-  return _mm512_maskz_rsqrt28_round_pd(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_rsqrt28_pd(__m512d a) {
-  // CHECK-LABEL: @test_mm512_rsqrt28_pd
-  // CHECK: @llvm.x86.avx512.rsqrt28.pd
-  return _mm512_rsqrt28_pd(a);
-}
-
-__m512d test_mm512_mask_rsqrt28_pd(__m512d s, __mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_mask_rsqrt28_pd
-  // CHECK: @llvm.x86.avx512.rsqrt28.pd
-  return _mm512_mask_rsqrt28_pd(s, m, a);
-}
-
-__m512d test_mm512_maskz_rsqrt28_pd(__mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_maskz_rsqrt28_pd
-  // CHECK: @llvm.x86.avx512.rsqrt28.pd
-  return _mm512_maskz_rsqrt28_pd(m, a);
-}
-
-__m512 test_mm512_rsqrt28_round_ps(__m512 a) {
-  // CHECK-LABEL: @test_mm512_rsqrt28_round_ps
-  // CHECK: @llvm.x86.avx512.rsqrt28.ps
-  return _mm512_rsqrt28_round_ps(a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_mask_rsqrt28_round_ps(__m512 s, __mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_mask_rsqrt28_round_ps
-  // CHECK: @llvm.x86.avx512.rsqrt28.ps
-  return _mm512_mask_rsqrt28_round_ps(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_maskz_rsqrt28_round_ps(__mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_ps
-  // CHECK: @llvm.x86.avx512.rsqrt28.ps
-  return _mm512_maskz_rsqrt28_round_ps(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_rsqrt28_ps(__m512 a) {
-  // CHECK-LABEL: @test_mm512_rsqrt28_ps
-  // CHECK: @llvm.x86.avx512.rsqrt28.ps
-  return _mm512_rsqrt28_ps(a);
-}
-
-__m512 test_mm512_mask_rsqrt28_ps(__m512 s, __mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_mask_rsqrt28_ps
-  // CHECK: @llvm.x86.avx512.rsqrt28.ps
-  return _mm512_mask_rsqrt28_ps(s, m, a);
-}
-
-__m512 test_mm512_maskz_rsqrt28_ps(__mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_maskz_rsqrt28_ps
-  // CHECK: @llvm.x86.avx512.rsqrt28.ps
-  return _mm512_maskz_rsqrt28_ps(m, a);
-}
-
-__m128 test_mm_rsqrt28_round_ss(__m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_rsqrt28_round_ss
-  // CHECK: @llvm.x86.avx512.rsqrt28.ss
-  return _mm_rsqrt28_round_ss(a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_mask_rsqrt28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_mask_rsqrt28_round_ss
-  // CHECK: @llvm.x86.avx512.rsqrt28.ss
-  return _mm_mask_rsqrt28_round_ss(s, m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_maskz_rsqrt28_round_ss(__mmask16 m, __m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_maskz_rsqrt28_round_ss
-  // CHECK: @llvm.x86.avx512.rsqrt28.ss
-  return _mm_maskz_rsqrt28_round_ss(m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_rsqrt28_ss(__m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_rsqrt28_ss
-  // CHECK: @llvm.x86.avx512.rsqrt28.ss
-  return _mm_rsqrt28_ss(a, b);
-}
-
-__m128 test_mm_mask_rsqrt28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_mask_rsqrt28_ss
-  // CHECK: @llvm.x86.avx512.rsqrt28.ss
-  return _mm_mask_rsqrt28_ss(s, m, a, b);
-}
-
-__m128 test_mm_maskz_rsqrt28_ss(__mmask16 m, __m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_maskz_rsqrt28_ss
-  // CHECK: @llvm.x86.avx512.rsqrt28.ss
-  return _mm_maskz_rsqrt28_ss(m, a, b);
-}
-
-__m128d test_mm_rsqrt28_round_sd(__m128d a, __m128d b) {
-  // CHECK-LABEL: @test_mm_rsqrt28_round_sd
-  // CHECK: @llvm.x86.avx512.rsqrt28.sd
-  return _mm_rsqrt28_round_sd(a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_mask_rsqrt28_round_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) {
-  // CHECK-LABEL: @test_mm_mask_rsqrt28_round_sd
-  // CHECK: @llvm.x86.avx512.rsqrt28.sd
-  return _mm_mask_rsqrt28_round_sd(s, m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_maskz_rsqrt28_round_sd(__mmask8 m, __m128d a, __m128d b) {
-  // CHECK-LABEL: @test_mm_maskz_rsqrt28_round_sd
-  // CHECK: @llvm.x86.avx512.rsqrt28.sd
-  return _mm_maskz_rsqrt28_round_sd(m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_rcp28_round_pd(__m512d a) {
-  // CHECK-LABEL: @test_mm512_rcp28_round_pd
-  // CHECK: @llvm.x86.avx512.rcp28.pd
-  return _mm512_rcp28_round_pd(a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_mask_rcp28_round_pd(__m512d s, __mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_mask_rcp28_round_pd
-  // CHECK: @llvm.x86.avx512.rcp28.pd
-  return _mm512_mask_rcp28_round_pd(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_maskz_rcp28_round_pd(__mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_maskz_rcp28_round_pd
-  // CHECK: @llvm.x86.avx512.rcp28.pd
-  return _mm512_maskz_rcp28_round_pd(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_rcp28_pd(__m512d a) {
-  // CHECK-LABEL: @test_mm512_rcp28_pd
-  // CHECK: @llvm.x86.avx512.rcp28.pd
-  return _mm512_rcp28_pd(a);
-}
-
-__m512d test_mm512_mask_rcp28_pd(__m512d s, __mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_mask_rcp28_pd
-  // CHECK: @llvm.x86.avx512.rcp28.pd
-  return _mm512_mask_rcp28_pd(s, m, a);
-}
-
-__m512d test_mm512_maskz_rcp28_pd(__mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_maskz_rcp28_pd
-  // CHECK: @llvm.x86.avx512.rcp28.pd
-  return _mm512_maskz_rcp28_pd(m, a);
-}
-
-__m512 test_mm512_rcp28_round_ps(__m512 a) {
-  // CHECK-LABEL: @test_mm512_rcp28_round_ps
-  // CHECK: @llvm.x86.avx512.rcp28.ps
-  return _mm512_rcp28_round_ps(a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_mask_rcp28_round_ps(__m512 s, __mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_mask_rcp28_round_ps
-  // CHECK: @llvm.x86.avx512.rcp28.ps
-  return _mm512_mask_rcp28_round_ps(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_maskz_rcp28_round_ps(__mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_maskz_rcp28_round_ps
-  // CHECK: @llvm.x86.avx512.rcp28.ps
-  return _mm512_maskz_rcp28_round_ps(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_rcp28_ps(__m512 a) {
-  // CHECK-LABEL: @test_mm512_rcp28_ps
-  // CHECK: @llvm.x86.avx512.rcp28.ps
-  return _mm512_rcp28_ps(a);
-}
-
-__m512 test_mm512_mask_rcp28_ps(__m512 s, __mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_mask_rcp28_ps
-  // CHECK: @llvm.x86.avx512.rcp28.ps
-  return _mm512_mask_rcp28_ps(s, m, a);
-}
-
-__m512 test_mm512_maskz_rcp28_ps(__mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_maskz_rcp28_ps
-  // CHECK: @llvm.x86.avx512.rcp28.ps
-  return _mm512_maskz_rcp28_ps(m, a);
-}
-
-__m128 test_mm_rcp28_round_ss(__m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_rcp28_round_ss
-  // CHECK: @llvm.x86.avx512.rcp28.ss
-  return _mm_rcp28_round_ss(a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_mask_rcp28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_mask_rcp28_round_ss
-  // CHECK: @llvm.x86.avx512.rcp28.ss
-  return _mm_mask_rcp28_round_ss(s, m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_maskz_rcp28_round_ss(__mmask16 m, __m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_maskz_rcp28_round_ss
-  // CHECK: @llvm.x86.avx512.rcp28.ss
-  return _mm_maskz_rcp28_round_ss(m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_rcp28_ss(__m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_rcp28_ss
-  // CHECK: @llvm.x86.avx512.rcp28.ss
-  return _mm_rcp28_ss(a, b);
-}
-
-__m128 test_mm_mask_rcp28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_mask_rcp28_ss
-  // CHECK: @llvm.x86.avx512.rcp28.ss
-  return _mm_mask_rcp28_ss(s, m, a, b);
-}
-
-__m128 test_mm_maskz_rcp28_ss(__mmask16 m, __m128 a, __m128 b) {
-  // CHECK-LABEL: @test_mm_maskz_rcp28_ss
-  // CHECK: @llvm.x86.avx512.rcp28.ss
-  return _mm_maskz_rcp28_ss(m, a, b);
-}
-
-__m128d test_mm_rcp28_round_sd(__m128d a, __m128d b) {
-  // CHECK-LABEL: @test_mm_rcp28_round_sd
-  // CHECK: @llvm.x86.avx512.rcp28.sd
-  return _mm_rcp28_round_sd(a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_mask_rcp28_round_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) {
-  // CHECK-LABEL: @test_mm_mask_rcp28_round_sd
-  // CHECK: @llvm.x86.avx512.rcp28.sd
-  return _mm_mask_rcp28_round_sd(s, m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_maskz_rcp28_round_sd(__mmask8 m, __m128d a, __m128d b) {
-  // CHECK-LABEL: @test_mm_maskz_rcp28_round_sd
-  // CHECK: @llvm.x86.avx512.rcp28.sd
-  return _mm_maskz_rcp28_round_sd(m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_rcp28_sd(__m128d a, __m128d b) {
-  // CHECK-LABEL: @test_mm_rcp28_sd
-  // CHECK: @llvm.x86.avx512.rcp28.sd
-  return _mm_rcp28_sd(a, b);
-}
-
-__m128d test_mm_mask_rcp28_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) {
-  // CHECK-LABEL: @test_mm_mask_rcp28_sd
-  // CHECK: @llvm.x86.avx512.rcp28.sd
-  return _mm_mask_rcp28_sd(s, m, a, b);
-}
-
-__m128d test_mm_maskz_rcp28_sd(__mmask8 m, __m128d a, __m128d b) {
-  // CHECK-LABEL: @test_mm_maskz_rcp28_sd
-  // CHECK: @llvm.x86.avx512.rcp28.sd
-  return _mm_maskz_rcp28_sd(m, a, b);
-}
-
-__m512d test_mm512_exp2a23_round_pd(__m512d a) {
-  // CHECK-LABEL: @test_mm512_exp2a23_round_pd
-  // CHECK: @llvm.x86.avx512.exp2.pd
-  return _mm512_exp2a23_round_pd(a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_mask_exp2a23_round_pd(__m512d s, __mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_mask_exp2a23_round_pd
-  // CHECK: @llvm.x86.avx512.exp2.pd
-  return _mm512_mask_exp2a23_round_pd(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_maskz_exp2a23_round_pd(__mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_maskz_exp2a23_round_pd
-  // CHECK: @llvm.x86.avx512.exp2.pd
-  return _mm512_maskz_exp2a23_round_pd(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_exp2a23_pd(__m512d a) {
-  // CHECK-LABEL: @test_mm512_exp2a23_pd
-  // CHECK: @llvm.x86.avx512.exp2.pd
-  return _mm512_exp2a23_pd(a);
-}
-
-__m512d test_mm512_mask_exp2a23_pd(__m512d s, __mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_mask_exp2a23_pd
-  // CHECK: @llvm.x86.avx512.exp2.pd
-  return _mm512_mask_exp2a23_pd(s, m, a);
-}
-
-__m512d test_mm512_maskz_exp2a23_pd(__mmask8 m, __m512d a) {
-  // CHECK-LABEL: @test_mm512_maskz_exp2a23_pd
-  // CHECK: @llvm.x86.avx512.exp2.pd
-  return _mm512_maskz_exp2a23_pd(m, a);
-}
-
-__m512 test_mm512_exp2a23_round_ps(__m512 a) {
-  // CHECK-LABEL: @test_mm512_exp2a23_round_ps
-  // CHECK: @llvm.x86.avx512.exp2.ps
-  return _mm512_exp2a23_round_ps(a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_mask_exp2a23_round_ps(__m512 s, __mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_mask_exp2a23_round_ps
-  // CHECK: @llvm.x86.avx512.exp2.ps
-  return _mm512_mask_exp2a23_round_ps(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_maskz_exp2a23_round_ps(__mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_maskz_exp2a23_round_ps
-  // CHECK: @llvm.x86.avx512.exp2.ps
-  return _mm512_maskz_exp2a23_round_ps(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_exp2a23_ps(__m512 a) {
-  // CHECK-LABEL: @test_mm512_exp2a23_ps
-  // CHECK: @llvm.x86.avx512.exp2.ps
-  return _mm512_exp2a23_ps(a);
-}
-
-__m512 test_mm512_mask_exp2a23_ps(__m512 s, __mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_mask_exp2a23_ps
-  // CHECK: @llvm.x86.avx512.exp2.ps
-  return _mm512_mask_exp2a23_ps(s, m, a);
-}
-
-__m512 test_mm512_maskz_exp2a23_ps(__mmask16 m, __m512 a) {
-  // CHECK-LABEL: @test_mm512_maskz_exp2a23_ps
-  // CHECK: @llvm.x86.avx512.exp2.ps
-  return _mm512_maskz_exp2a23_ps(m, a);
-}
-
diff --git a/clang/test/CodeGen/function-target-features.c b/clang/test/CodeGen/function-target-features.c
index 0d8bfc7e4e44c4..d6a73ff8224b61 100644
--- a/clang/test/CodeGen/function-target-features.c
+++ b/clang/test/CodeGen/function-target-features.c
@@ -4,7 +4,7 @@
 
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-FEATURE
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-NO-CPU
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f -target-feature +avx512er | FileCheck %s -check-prefix=TWO-AVX
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f -target-feature +avx512bw | FileCheck %s -check-prefix=TWO-AVX
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 | FileCheck %s -check-prefix=CORE-CPU
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 -target-feature +avx | FileCheck %s -check-prefix=CORE-CPU-AND-FEATURES
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu x86-64 | FileCheck %s -check-prefix=X86-64-CPU
@@ -17,7 +17,7 @@ void foo(void) {}
 
 // AVX-FEATURE: "target-features"{{.*}}+avx
 // AVX-NO-CPU-NOT: target-cpu
-// TWO-AVX: "target-features"={{.*}}+avx512er{{.*}}+avx512f
+// TWO-AVX: "target-features"={{.*}}+avx512bw{{.*}}+avx512f
 // CORE-CPU: "target-cpu"="corei7"
 // CORE-CPU-AND-FEATURES: "target-cpu"="corei7" "target-features"={{.*}}+avx
 // X86-64-CPU: "target-cpu"="x86-64"
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 505f4a3e94565d..9beea3bdef69a7 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -68,7 +68,6 @@ void verifyfeaturestrings(void) {
   (void)__builtin_cpu_supports("avx512bw");
   (void)__builtin_cpu_supports("avx512dq");
   (void)__builtin_cpu_supports("avx512cd");
-  (void)__builtin_cpu_supports("avx512er");
   (void)__builtin_cpu_supports("avx512pf");
   (void)__builtin_cpu_supports("avx512vbmi");
   (void)__builtin_cpu_supports("avx512ifma");
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 6fb9df96e2b333..942df9259e6a8e 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -21,11 +21,6 @@
 // SSE4-AES: "-target-feature" "+sse4.2" "-target-feature" "+aes"
 // NO-SSE4-AES: "-target-feature" "-sse4.1" "-target-feature" "-aes"
 
-// RUN: %clang --target=i386 -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### 2>&1 | FileCheck -check-prefix=AVX %s
-// RUN: %clang --target=i386 -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### 2>&1 | FileCheck -check-prefix=NO-AVX %s
-// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512er" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma"
-// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512er" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma"
-
 // RUN: %clang --target=i386 -march=i386 -mpclmul -mrdrnd -mfsgsbase -mbmi -mbmi2 %s -### 2>&1 | FileCheck -check-prefix=BMI %s
 // RUN: %clang --target=i386 -march=i386 -mno-pclmul -mno-rdrnd -mno-fsgsbase -mno-bmi -mno-bmi2 %s -### 2>&1 | FileCheck -check-prefix=NO-BMI %s
 // BMI: "-target-feature" "+pclmul" "-target-feature" "+rdrnd" "-target-feature" "+fsgsbase" "-target-feature" "+bmi" "-target-feature" "+bmi2"
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 8ef565ccbc85c3..acba2a0a9bda22 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -90,22 +90,6 @@
 // AVX512CD: #define __SSE__ 1
 // AVX512CD: #define __SSSE3__ 1
 
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512ER %s
-
-// AVX512ER: #define __AVX2__ 1
-// AVX512ER: #define __AVX512ER__ 1
-// AVX512ER: #define __AVX512F__ 1
-// AVX512ER: #define __AVX__ 1
-// AVX512ER: #define __EVEX512__ 1
-// AVX512ER: #define __SSE2_MATH__ 1
-// AVX512ER: #define __SSE2__ 1
-// AVX512ER: #define __SSE3__ 1
-// AVX512ER: #define __SSE4_1__ 1
-// AVX512ER: #define __SSE4_2__ 1
-// AVX512ER: #define __SSE_MATH__ 1
-// AVX512ER: #define __SSE__ 1
-// AVX512ER: #define __SSSE3__ 1
-
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512PF %s
 
 // AVX512PF: #define __AVX2__ 1
@@ -638,14 +622,12 @@
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
 // NOEVEX512-NOT: #define __AVX512F__ 1
 // NOEVEX512-NOT: #define __EVEX256__ 1
 // NOEVEX512-NOT: #define __EVEX512__ 1
 
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
 // AVX512NOEVEX512: #define __AVX512F__ 1
 // AVX512NOEVEX512-NOT: #define __EVEX256__ 1
 // AVX512NOEVEX512-NOT: #define __EVEX512__ 1
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index fdc2b0fb7f80f1..0fda7e66c06a75 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -3843,58 +3843,6 @@ let TargetPrefix = "x86" in {  // All intrinsics start with "llvm.x86.".
       DefaultAttrsIntrinsic<[llvm_v16f32_ty],
                             [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
                             [IntrNoMem]>;
-
-  def int_x86_avx512_rcp28_ps : ClangBuiltin<"__builtin_ia32_rcp28ps_mask">,
-      DefaultAttrsIntrinsic<[llvm_v16f32_ty],
-                            [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
-                             llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
-  def int_x86_avx512_rcp28_pd : ClangBuiltin<"__builtin_ia32_rcp28pd_mask">,
-      DefaultAttrsIntrinsic<[llvm_v8f64_ty],
-                            [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
-                             llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
-  def int_x86_avx512_exp2_ps : ClangBuiltin<"__builtin_ia32_exp2ps_mask">,
-      DefaultAttrsIntrinsic<[llvm_v16f32_ty],
-                            [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
-                             llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
-  def int_x86_avx512_exp2_pd : ClangBuiltin<"__builtin_ia32_exp2pd_mask">,
-      DefaultAttrsIntrinsic<[llvm_v8f64_ty],
-                            [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
-                             llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
-
-  def int_x86_avx512_rcp28_ss : ClangBuiltin<"__builtin_ia32_rcp28ss_round_mask">,
-      DefaultAttrsIntrinsic<[llvm_v4f32_ty],
-                            [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                             llvm_i8_ty, llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
-  def int_x86_avx512_rcp28_sd : ClangBuiltin<"__builtin_ia32_rcp28sd_round_mask">,
-      DefaultAttrsIntrinsic<[llvm_v2f64_ty],
-                            [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                             llvm_i8_ty, llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
-  def int_x86_avx512_rsqrt28_ps : ClangBuiltin<"__builtin_ia32_rsqrt28ps_mask">,
-      DefaultAttrsIntrinsic<[llvm_v16f32_ty],
-                            [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
-                             llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
-  def int_x86_avx512_rsqrt28_pd : ClangBuiltin<"__builtin_ia32_rsqrt28pd_mask">,
-      DefaultAttrsIntrinsic<[llvm_v8f64_ty],
-                            [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
-                             llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<3>>]>;
-  def int_x86_avx512_rsqrt28_ss : ClangBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">,
-      DefaultAttrsIntrinsic<[llvm_v4f32_ty],
-                            [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
-                             llvm_i8_ty, llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
-  def int_x86_avx512_rsqrt28_sd : ClangBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">,
-      DefaultAttrsIntrinsic<[llvm_v2f64_ty],
-                            [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
-                             llvm_i8_ty, llvm_i32_ty],
-                            [IntrNoMem, ImmArg<ArgIndex<4>>]>;
   def int_x86_avx512_psad_bw_512 : ClangBuiltin<"__builtin_ia32_psadbw512">,
       DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
                             [IntrNoMem, Commutative]>;
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index d13dc9a271e59b..682b32e29cff55 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -124,9 +124,6 @@ def FeatureEVEX512  : SubtargetFeature<"evex512", "HasEVEX512", "true",
 def FeatureAVX512   : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
                                       "Enable AVX-512 instructions",
                                       [FeatureAVX2, FeatureFMA, FeatureF16C]>;
-def FeatureERI      : SubtargetFeature<"avx512er", "HasERI", "true",
-                      "Enable AVX-512 Exponential and Reciprocal Instructions",
-                                      [FeatureAVX512]>;
 def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
                       "Enable AVX-512 Conflict Detection Instructions",
                                       [FeatureAVX512]>;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 7c3c1d5fe42b3c..981cefc1b55638 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -9247,6 +9247,31 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
   }
 }
 
+multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain, hasNoSchedulingInfo = 1 in {
+  defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                           (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+                           "$src2, $src1", "$src1, $src2",
+                           (null_frag)>, Sched<[WriteMove]>;
+  defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                            (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+                            "{sae}, $src2, $src1", "$src1, $src2, {sae}",
+                            (null_frag)>, Sched<[WriteMove]>, EVEX_B;
+  defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
+                         "$src2, $src1", "$src1, $src2",
+                         (null_frag)>, Sched<[WriteMove]>;
+  }
+}
+
+multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr> {
+  defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info>, EVEX_CD8<32, CD8VT1>, VEX_LIG, T8PD, EVEX_4V;
+  defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info>, EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8PD, EVEX_4V;
+}
+
+defm VRCP28   : avx512_eri_s_ass<0xCB, "vrcp28">;
+defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28">;
+
 multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
                         SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
   defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
@@ -9262,13 +9287,6 @@ multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
                EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
 }
 
-let Predicates = [HasERI] in {
-  defm VRCP28   : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
-                               SchedWriteFRcp.Scl>;
-  defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
-                               SchedWriteFRsqrt.Scl>;
-}
-
 defm VGETEXP   : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
                               SchedWriteFRnd.Scl>,
                  avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
@@ -9307,6 +9325,43 @@ multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
                         EVEX_B, Sched<[sched]>;
 }
 
+multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain, hasNoSchedulingInfo = 1 in {
+  defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                         (ins _.RC:$src), OpcodeStr, "$src", "$src",
+                         (null_frag)>, Sched<[WriteMove]>;
+
+  defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+                         (null_frag)>, Sched<[WriteMove]>;
+
+  defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                         (ins _.ScalarMemOp:$src), OpcodeStr,
+                         "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
+                         (null_frag)>, Sched<[WriteMove]>, EVEX_B;
+  }
+}
+multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
+  defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+                        (ins _.RC:$src), OpcodeStr,
+                        "{sae}, $src", "$src, {sae}",
+                        (null_frag)>, Sched<[WriteMove]>, EVEX_B;
+}
+
+multiclass  avx512_eri_ass<bits<8> opc, string OpcodeStr> {
+   defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info>,
+              avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info>,
+              T8PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
+   defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info>,
+              avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info>,
+              T8PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
+}
+
+defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28">, EVEX;
+defm VRCP28   : avx512_eri_ass<0xCA, "vrcp28">, EVEX;
+defm VEXP2    : avx512_eri_ass<0xC8, "vexp2">, EVEX;
+
 multiclass  avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
                        SDNode OpNodeSAE, X86SchedWriteWidths sched> {
    defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
@@ -9349,14 +9404,7 @@ multiclass  avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
                                      EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
   }
 }
-let Predicates = [HasERI] in {
- defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
-                            SchedWriteFRsqrt>, EVEX;
- defm VRCP28   : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
-                            SchedWriteFRcp>, EVEX;
- defm VEXP2    : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
-                            SchedWriteFAdd>, EVEX;
-}
+
 defm VGETEXP   : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
                             SchedWriteFRnd>,
                  avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index df4bc38aa0b56f..6b89d2834a1da3 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -44,7 +44,6 @@ def NoAVX512     : Predicate<"!Subtarget->hasAVX512()">;
 def HasCDI       : Predicate<"Subtarget->hasCDI()">;
 def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">;
 def HasPFI       : Predicate<"Subtarget->hasPFI()">;
-def HasERI       : Predicate<"Subtarget->hasERI()">;
 def HasDQI       : Predicate<"Subtarget->hasDQI()">;
 def NoDQI        : Predicate<"!Subtarget->hasDQI()">;
 def HasBWI       : Predicate<"Subtarget->hasBWI()">;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 3bb2f07b5f1a13..9a0a4e86570357 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -454,8 +454,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_dbpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),
   X86_INTRINSIC_DATA(avx512_div_pd_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
   X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
-  X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
-  X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
   X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
   X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
   X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
@@ -908,10 +906,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
   X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
   X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
-  X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
-  X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
-  X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
-  X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
   X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
@@ -920,10 +914,6 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {
   X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
   X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
-  X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
   X86_INTRINSIC_DATA(avx512_sitofp_round, INTR_TYPE_1OP, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
   X86_INTRINSIC_DATA(avx512_sqrt_pd_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
   X86_INTRINSIC_DATA(avx512_sqrt_ps_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 11c5000acc077d..cae3ba1dffe481 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -975,8 +975,6 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
         CPU = "cascadelake";
       } else if (testFeature(X86::FEATURE_AVX512VL)) {
         CPU = "skylake-avx512";
-      } else if (testFeature(X86::FEATURE_AVX512ER)) {
-        CPU = "knl";
       } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
         if (testFeature(X86::FEATURE_SHA))
           CPU = "goldmont";
@@ -1270,8 +1268,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
     setFeature(X86::FEATURE_CLFLUSHOPT);
   if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
     setFeature(X86::FEATURE_AVX512PF);
-  if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
-    setFeature(X86::FEATURE_AVX512ER);
   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
     setFeature(X86::FEATURE_AVX512CD);
   if (HasLeaf7 && ((EBX >> 29) & 1))
@@ -1770,7 +1766,6 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
   Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
-  Features["avx512er"]   = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
diff --git a/llvm/test/CodeGen/X86/avx512er-intrinsics.ll b/llvm/test/CodeGen/X86/avx512er-intrinsics.ll
deleted file mode 100644
index fa4025f76b57db..00000000000000
--- a/llvm/test/CodeGen/X86/avx512er-intrinsics.ll
+++ /dev/null
@@ -1,306 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
-
-define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) {
-; CHECK-LABEL: test_rsqrt28_ps:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vrsqrt28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
-  ret <16 x float> %res
-}
-
-define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) {
-; CHECK-LABEL: test1_rsqrt28_ps:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
-; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; CHECK-NEXT:    vrsqrt28ps {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8]
-; CHECK-NEXT:    vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8)
-  ret <16 x float> %res
-}
-
-define <16 x float> @test2_rsqrt28_ps(<16 x float> %a0) {
-; CHECK-LABEL: test2_rsqrt28_ps:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
-; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; CHECK-NEXT:    vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 4)
-  ret <16 x float> %res
-}
-
-define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) {
-; CHECK-LABEL: test3_rsqrt28_ps:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
-; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; CHECK-NEXT:    vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 6, i32 4)
-  ret <16 x float> %res
-}
-
-define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) {
-; CHECK-LABEL: test4_rsqrt28_ps:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
-; CHECK-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; CHECK-NEXT:    vrsqrt28ps {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8)
-  ret <16 x float> %res
-}
-
-declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
-; CHECK-LABEL: test_rcp28_ps_512:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vrcp28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
-  ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
-; CHECK-LABEL: test_rcp28_pd_512:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vrcp28pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
-  ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
-
-define <16 x float> @test_exp2_ps_512(<16 x float> %a0) {
-; CHECK-LABEL: test_exp2_ps_512:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vexp2ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
-  ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <8 x double> @test_exp2_pd_512(<8 x double> %a0) {
-; CHECK-LABEL: test_exp2_pd_512:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vexp2pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
-  ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
-
-define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
-; CHECK-LABEL: test_rsqrt28_ss:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
-define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
-; CHECK-LABEL: test_rcp28_ss:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vrcp28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
-; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
-define <4 x float> @test_rcp28_ss_load(<4 x float> %a0, ptr %a1ptr) {
-; X86-LABEL: test_rcp28_ss_load:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    vrcp28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x00]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rcp28_ss_load:
-; X64:       # %bb.0:
-; X64-NEXT:    vrcp28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x07]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %a1 = load <4 x float>, ptr %a1ptr
-  %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-
-define <4 x float> @test_rsqrt28_ss_load(<4 x float> %a0, ptr %a1ptr) {
-; X86-LABEL: test_rsqrt28_ss_load:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    vrsqrt28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x00]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_ss_load:
-; X64:       # %bb.0:
-; X64-NEXT:    vrsqrt28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x07]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %a1 = load <4 x float>, ptr %a1ptr
-  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1]
-  ret <4 x float> %res
-}
-
-define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0, i8 %mask) {
-; X86-LABEL: test_rsqrt28_ss_maskz:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_ss_maskz:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT:    vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 %mask, i32 8) ;
-  ret <4 x float> %res
-}
-
-define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask) {
-; X86-LABEL: test_rsqrt28_ss_mask:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
-; X86-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_ss_mask:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT:    vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
-; X64-NEXT:    vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask, i32 8) ;
-  ret <4 x float> %res
-}
-
-define <2 x double> @test_rcp28_sd_mask_load(<2 x double> %a0, ptr %a1ptr, <2 x double> %a2, i8 %mask) {
-; X86-LABEL: test_rcp28_sd_mask_load:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8]
-; X86-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rcp28_sd_mask_load:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT:    vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8]
-; X64-NEXT:    vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %a1 = load <2 x double>, ptr %a1ptr
-  %res = call <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a2, i8 %mask, i32 4) ;
-  ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
-
-define <2 x double> @test_rsqrt28_sd_maskz_load(<2 x double> %a0, ptr %a1ptr, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_maskz_load:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_maskz_load:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT:    vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %a1 = load <2 x double>, ptr %a1ptr
-  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
-  ret <2 x double> %res
-}
-
-define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_maskz:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_maskz:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT:    vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 8) ;
-  ret <2 x double> %res
-}
-
-define <2 x double> @test_rsqrt28_sd_mask(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_mask:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1]
-; X86-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_mask:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT:    vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1]
-; X64-NEXT:    vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask, i32 8) ;
-  ret <2 x double> %res
-}
-
-declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
-
-define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, ptr %ptr, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_maskz_mem:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    vrsqrt28sd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x00]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_maskz_mem:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT:    vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %mem = load double , ptr %ptr, align 8
-  %mem_v = insertelement <2 x double> undef, double %mem, i32 0
-  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
-  ret <2 x double> %res
-}
-
-define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, ptr %ptr, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_maskz_mem_offset:
-; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT:    vrsqrt28sd 144(%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x40,0x12]
-; X86-NEXT:    retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_maskz_mem_offset:
-; X64:       # %bb.0:
-; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT:    vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12]
-; X64-NEXT:    retq # encoding: [0xc3]
-  %ptr1 = getelementptr double, ptr %ptr, i32 18
-  %mem = load double , ptr %ptr1, align 8
-  %mem_v = insertelement <2 x double> undef, double %mem, i32 0
-  %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
-  ret <2 x double> %res
-}
-
diff --git a/llvm/test/CodeGen/X86/crc32-target-feature.ll b/llvm/test/CodeGen/X86/crc32-target-feature.ll
index ef4fafcae5dce0..9dfe27e653511b 100644
--- a/llvm/test/CodeGen/X86/crc32-target-feature.ll
+++ b/llvm/test/CodeGen/X86/crc32-target-feature.ll
@@ -25,5 +25,5 @@ define i32 @test3(i32 %a, i8 %b) nounwind #2 {
 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
 
 attributes #0 = { "target-features"="+crc32" }
-attributes #1 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" }
-attributes #2 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" }
+attributes #1 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" }
+attributes #2 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" }
diff --git a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir
index 4c715b894fae8e..af57d972f22468 100644
--- a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir
+++ b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir
@@ -23,7 +23,7 @@
     br i1 %6, label %4, label %5, !llvm.loop !9
   }
 
-  attributes #0 = { nofree norecurse nosync nounwind uwtable writeonly mustprogress "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="generic" }
+  attributes #0 = { nofree norecurse nosync nounwind uwtable writeonly mustprogress "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="generic" }
 
   !llvm.module.flags = !{!0, !1}
   !llvm.ident = !{!2}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
index 03b1aece9e8702..a461f35d00dc98 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
@@ -54,4 +54,4 @@ bb10:                                             ; preds = %bb10, %bb
 }
 
 
-attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
+attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
index 3a5db926082f09..3e3018f5060948 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
@@ -88,7 +88,7 @@ loopexit:
   ret void
 }
 
-attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
+attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
 
 !0 = !{i32 0, i32 2147483646}
 !1 = !{}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
index 5c9fe54b552120..20566005c93dfd 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
@@ -118,7 +118,7 @@ L44:                                              ; preds = %L26
   ret ptr addrspace(10) null
 }
 
-attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-avx512er,-sha,-prefetchwt1,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-sha,-prefetchwt1,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
 attributes #1 = { inaccessiblemem_or_argmemonly }
 attributes #2 = { allocsize(1) }
 
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s
deleted file mode 100644
index 034fc6d83d153b..00000000000000
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s
+++ /dev/null
@@ -1,373 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
-
-vexp2pd           %zmm16, %zmm19
-vexp2pd           (%rax), %zmm19
-vexp2pd           (%rax){1to8}, %zmm19
-vexp2pd           %zmm16, %zmm19 {k1}
-vexp2pd           (%rax), %zmm19 {k1}
-vexp2pd           (%rax){1to8}, %zmm19 {k1}
-vexp2pd           %zmm16, %zmm19 {z}{k1}
-vexp2pd           (%rax), %zmm19 {z}{k1}
-vexp2pd           (%rax){1to8}, %zmm19 {z}{k1}
-
-vexp2pd           {sae}, %zmm16, %zmm19
-vexp2pd           {sae}, %zmm16, %zmm19 {k1}
-vexp2pd           {sae}, %zmm16, %zmm19 {z}{k1}
-
-vexp2ps           %zmm16, %zmm19
-vexp2ps           (%rax), %zmm19
-vexp2ps           (%rax){1to16}, %zmm19
-vexp2ps           %zmm16, %zmm19 {k1}
-vexp2ps           (%rax), %zmm19 {k1}
-vexp2ps           (%rax){1to16}, %zmm19 {k1}
-vexp2ps           %zmm16, %zmm19 {z}{k1}
-vexp2ps           (%rax), %zmm19 {z}{k1}
-vexp2ps           (%rax){1to16}, %zmm19 {z}{k1}
-
-vexp2ps           {sae}, %zmm16, %zmm19
-vexp2ps           {sae}, %zmm16, %zmm19 {k1}
-vexp2ps           {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrcp28pd          %zmm16, %zmm19
-vrcp28pd          (%rax), %zmm19
-vrcp28pd          (%rax){1to8}, %zmm19
-vrcp28pd          %zmm16, %zmm19 {k1}
-vrcp28pd          (%rax), %zmm19 {k1}
-vrcp28pd          (%rax){1to8}, %zmm19 {k1}
-vrcp28pd          %zmm16, %zmm19 {z}{k1}
-vrcp28pd          (%rax), %zmm19 {z}{k1}
-vrcp28pd          (%rax){1to8}, %zmm19 {z}{k1}
-
-vrcp28pd          {sae}, %zmm16, %zmm19
-vrcp28pd          {sae}, %zmm16, %zmm19 {k1}
-vrcp28pd          {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrcp28ps          %zmm16, %zmm19
-vrcp28ps          (%rax), %zmm19
-vrcp28ps          (%rax){1to16}, %zmm19
-vrcp28ps          %zmm16, %zmm19 {k1}
-vrcp28ps          (%rax), %zmm19 {k1}
-vrcp28ps          (%rax){1to16}, %zmm19 {k1}
-vrcp28ps          %zmm16, %zmm19 {z}{k1}
-vrcp28ps          (%rax), %zmm19 {z}{k1}
-vrcp28ps          (%rax){1to16}, %zmm19 {z}{k1}
-
-vrcp28ps          {sae}, %zmm16, %zmm19
-vrcp28ps          {sae}, %zmm16, %zmm19 {k1}
-vrcp28ps          {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrcp28sd          %xmm16, %xmm17, %xmm19
-vrcp28sd          (%rax), %xmm17, %xmm19
-vrcp28sd          %xmm16, %xmm17, %xmm19 {k1}
-vrcp28sd          (%rax), %xmm17, %xmm19 {k1}
-vrcp28sd          %xmm16, %xmm17, %xmm19 {z}{k1}
-vrcp28sd          (%rax), %xmm17, %xmm19 {z}{k1}
-
-vrcp28sd          {sae}, %xmm16, %xmm17, %xmm19
-vrcp28sd          {sae}, %xmm16, %xmm17, %xmm19 {k1}
-vrcp28sd          {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
-
-vrcp28ss          %xmm16, %xmm17, %xmm19
-vrcp28ss          (%rax), %xmm17, %xmm19
-vrcp28ss          %xmm16, %xmm17, %xmm19 {k1}
-vrcp28ss          (%rax), %xmm17, %xmm19 {k1}
-vrcp28ss          %xmm16, %xmm17, %xmm19 {z}{k1}
-vrcp28ss          (%rax), %xmm17, %xmm19 {z}{k1}
-
-vrcp28ss          {sae}, %xmm16, %xmm17, %xmm19
-vrcp28ss          {sae}, %xmm16, %xmm17, %xmm19 {k1}
-vrcp28ss          {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
-
-vrsqrt28pd        %zmm16, %zmm19
-vrsqrt28pd        (%rax), %zmm19
-vrsqrt28pd        (%rax){1to8}, %zmm19
-vrsqrt28pd        %zmm16, %zmm19 {k1}
-vrsqrt28pd        (%rax), %zmm19 {k1}
-vrsqrt28pd        (%rax){1to8}, %zmm19 {k1}
-vrsqrt28pd        %zmm16, %zmm19 {z}{k1}
-vrsqrt28pd        (%rax), %zmm19 {z}{k1}
-vrsqrt28pd        (%rax){1to8}, %zmm19 {z}{k1}
-
-vrsqrt28pd        {sae}, %zmm16, %zmm19
-vrsqrt28pd        {sae}, %zmm16, %zmm19 {k1}
-vrsqrt28pd        {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrsqrt28ps        %zmm16, %zmm19
-vrsqrt28ps        (%rax), %zmm19
-vrsqrt28ps        (%rax){1to16}, %zmm19
-vrsqrt28ps        %zmm16, %zmm19 {k1}
-vrsqrt28ps        (%rax), %zmm19 {k1}
-vrsqrt28ps        (%rax){1to16}, %zmm19 {k1}
-vrsqrt28ps        %zmm16, %zmm19 {z}{k1}
-vrsqrt28ps        (%rax), %zmm19 {z}{k1}
-vrsqrt28ps        (%rax){1to16}, %zmm19 {z}{k1}
-
-vrsqrt28ps        {sae}, %zmm16, %zmm19
-vrsqrt28ps        {sae}, %zmm16, %zmm19 {k1}
-vrsqrt28ps        {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrsqrt28sd        %xmm16, %xmm17, %xmm19
-vrsqrt28sd        (%rax), %xmm17, %xmm19
-vrsqrt28sd        %xmm16, %xmm17, %xmm19 {k1}
-vrsqrt28sd        (%rax), %xmm17, %xmm19 {k1}
-vrsqrt28sd        %xmm16, %xmm17, %xmm19 {z}{k1}
-vrsqrt28sd        (%rax), %xmm17, %xmm19 {z}{k1}
-
-vrsqrt28sd        {sae}, %xmm16, %xmm17, %xmm19
-vrsqrt28sd        {sae}, %xmm16, %xmm17, %xmm19 {k1}
-vrsqrt28sd        {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
-
-vrsqrt28ss        %xmm16, %xmm17, %xmm19
-vrsqrt28ss        (%rax), %xmm17, %xmm19
-vrsqrt28ss        %xmm16, %xmm17, %xmm19 {k1}
-vrsqrt28ss        (%rax), %xmm17, %xmm19 {k1}
-vrsqrt28ss        %xmm16, %xmm17, %xmm19 {z}{k1}
-vrsqrt28ss        (%rax), %xmm17, %xmm19 {z}{k1}
-
-vrsqrt28ss        {sae}, %xmm16, %xmm17, %xmm19
-vrsqrt28ss        {sae}, %xmm16, %xmm17, %xmm19 {k1}
-vrsqrt28ss        {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
-
-# CHECK:      Instruction Info:
-# CHECK-NEXT: [1]: #uOps
-# CHECK-NEXT: [2]: Latency
-# CHECK-NEXT: [3]: RThroughput
-# CHECK-NEXT: [4]: MayLoad
-# CHECK-NEXT: [5]: MayStore
-# CHECK-NEXT: [6]: HasSideEffects (U)
-
-# CHECK:      [1]    [2]    [3]    [4]    [5]    [6]    Instructions:
-# CHECK-NEXT:  1      3     1.00                        vexp2pd	%zmm16, %zmm19
-# CHECK-NEXT:  2      10    1.00    *                   vexp2pd	(%rax), %zmm19
-# CHECK-NEXT:  2      10    1.00    *                   vexp2pd	(%rax){1to8}, %zmm19
-# CHECK-NEXT:  1      3     1.00                        vexp2pd	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  2      10    1.00    *                   vexp2pd	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  2      10    1.00    *                   vexp2pd	(%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT:  1      3     1.00                        vexp2pd	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  2      10    1.00    *                   vexp2pd	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  2      10    1.00    *                   vexp2pd	(%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      3     1.00                        vexp2pd	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  1      3     1.00                        vexp2pd	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  1      3     1.00                        vexp2pd	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      3     1.00                        vexp2ps	%zmm16, %zmm19
-# CHECK-NEXT:  2      10    1.00    *                   vexp2ps	(%rax), %zmm19
-# CHECK-NEXT:  2      10    1.00    *                   vexp2ps	(%rax){1to16}, %zmm19
-# CHECK-NEXT:  1      3     1.00                        vexp2ps	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  2      10    1.00    *                   vexp2ps	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  2      10    1.00    *                   vexp2ps	(%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT:  1      3     1.00                        vexp2ps	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  2      10    1.00    *                   vexp2ps	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  2      10    1.00    *                   vexp2ps	(%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      3     1.00                        vexp2ps	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  1      3     1.00                        vexp2ps	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  1      3     1.00                        vexp2ps	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  3      7     2.00                        vrcp28pd	%zmm16, %zmm19
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28pd	(%rax), %zmm19
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28pd	(%rax){1to8}, %zmm19
-# CHECK-NEXT:  3      7     2.00                        vrcp28pd	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28pd	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28pd	(%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT:  3      7     2.00                        vrcp28pd	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28pd	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28pd	(%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  3      7     2.00                        vrcp28pd	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  3      7     2.00                        vrcp28pd	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  3      7     2.00                        vrcp28pd	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  3      7     2.00                        vrcp28ps	%zmm16, %zmm19
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28ps	(%rax), %zmm19
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28ps	(%rax){1to16}, %zmm19
-# CHECK-NEXT:  3      7     2.00                        vrcp28ps	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28ps	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28ps	(%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT:  3      7     2.00                        vrcp28ps	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28ps	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  4      14    2.00    *                   vrcp28ps	(%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  3      7     2.00                        vrcp28ps	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  3      7     2.00                        vrcp28ps	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  3      7     2.00                        vrcp28ps	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      5     1.00                        vrcp28sd	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  2      11    1.00    *                   vrcp28sd	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  1      5     1.00                        vrcp28sd	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  2      11    1.00    *                   vrcp28sd	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      5     1.00                        vrcp28sd	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  2      11    1.00    *                   vrcp28sd	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      5     1.00                        vrcp28sd	{sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  1      5     1.00                        vrcp28sd	{sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      5     1.00                        vrcp28sd	{sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      5     1.00                        vrcp28ss	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  2      11    1.00    *                   vrcp28ss	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  1      5     1.00                        vrcp28ss	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  2      11    1.00    *                   vrcp28ss	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      5     1.00                        vrcp28ss	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  2      11    1.00    *                   vrcp28ss	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      5     1.00                        vrcp28ss	{sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  1      5     1.00                        vrcp28ss	{sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      5     1.00                        vrcp28ss	{sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28pd	%zmm16, %zmm19
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28pd	(%rax), %zmm19
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28pd	(%rax){1to8}, %zmm19
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28pd	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28pd	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28pd	(%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28pd	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28pd	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28pd	(%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28pd	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28pd	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28pd	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28ps	%zmm16, %zmm19
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28ps	(%rax), %zmm19
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28ps	(%rax){1to16}, %zmm19
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28ps	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28ps	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28ps	(%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28ps	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28ps	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  4      14    2.00    *                   vrsqrt28ps	(%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28ps	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28ps	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  3      7     2.00                        vrsqrt28ps	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28sd	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  2      11    1.00    *                   vrsqrt28sd	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28sd	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  2      11    1.00    *                   vrsqrt28sd	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28sd	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  2      11    1.00    *                   vrsqrt28sd	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28sd	{sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28sd	{sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28sd	{sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28ss	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  2      11    1.00    *                   vrsqrt28ss	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28ss	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  2      11    1.00    *                   vrsqrt28ss	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28ss	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  2      11    1.00    *                   vrsqrt28ss	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28ss	{sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28ss	{sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  1      5     1.00                        vrsqrt28ss	{sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-
-# CHECK:      Resources:
-# CHECK-NEXT: [0]   - SBDivider
-# CHECK-NEXT: [1]   - SBFPDivider
-# CHECK-NEXT: [2]   - SBPort0
-# CHECK-NEXT: [3]   - SBPort1
-# CHECK-NEXT: [4]   - SBPort4
-# CHECK-NEXT: [5]   - SBPort5
-# CHECK-NEXT: [6.0] - SBPort23
-# CHECK-NEXT: [6.1] - SBPort23
-
-# CHECK:      Resource pressure per iteration:
-# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]
-# CHECK-NEXT:  -      -     156.00 24.00   -     24.00  24.00  24.00
-
-# CHECK:      Resource pressure by instruction:
-# CHECK-NEXT: [0]    [1]    [2]    [3]    [4]    [5]    [6.0]  [6.1]  Instructions:
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2pd	%zmm16, %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2pd	(%rax), %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2pd	(%rax){1to8}, %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2pd	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2pd	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2pd	(%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2pd	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2pd	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2pd	(%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2pd	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2pd	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2pd	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2ps	%zmm16, %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2ps	(%rax), %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2ps	(%rax){1to16}, %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2ps	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2ps	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2ps	(%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2ps	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2ps	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -     1.00    -      -     0.50   0.50   vexp2ps	(%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2ps	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2ps	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -      -     1.00    -      -      -      -     vexp2ps	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28pd	%zmm16, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28pd	(%rax), %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28pd	(%rax){1to8}, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28pd	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28pd	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28pd	(%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28pd	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28pd	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28pd	(%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28pd	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28pd	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28pd	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28ps	%zmm16, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28ps	(%rax), %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28ps	(%rax){1to16}, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28ps	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28ps	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28ps	(%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28ps	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28ps	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrcp28ps	(%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28ps	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28ps	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrcp28ps	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28sd	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrcp28sd	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28sd	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrcp28sd	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28sd	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrcp28sd	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28sd	{sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28sd	{sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28sd	{sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28ss	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrcp28ss	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28ss	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrcp28ss	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28ss	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrcp28ss	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28ss	{sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28ss	{sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrcp28ss	{sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28pd	%zmm16, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28pd	(%rax), %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28pd	(%rax){1to8}, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28pd	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28pd	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28pd	(%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28pd	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28pd	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28pd	(%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28pd	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28pd	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28pd	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28ps	%zmm16, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28ps	(%rax), %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28ps	(%rax){1to16}, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28ps	%zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28ps	(%rax), %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28ps	(%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28ps	%zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28ps	(%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50   0.50   0.50   vrsqrt28ps	(%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28ps	{sae}, %zmm16, %zmm19
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28ps	{sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT:  -      -     2.50    -      -     0.50    -      -     vrsqrt28ps	{sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28sd	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrsqrt28sd	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28sd	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrsqrt28sd	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28sd	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrsqrt28sd	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28sd	{sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28sd	{sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28sd	{sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28ss	%xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrsqrt28ss	(%rax), %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28ss	%xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrsqrt28ss	(%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28ss	%xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -     0.50   0.50   vrsqrt28ss	(%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28ss	{sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28ss	{sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT:  -      -     1.00    -      -      -      -      -     vrsqrt28ss	{sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
index 6059074dfa27b6..dca476e658b4f2 100644
--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
@@ -138,7 +138,6 @@ copy("Headers") {
     "avx512bwintrin.h",
     "avx512cdintrin.h",
     "avx512dqintrin.h",
-    "avx512erintrin.h",
     "avx512fintrin.h",
     "avx512fp16intrin.h",
     "avx512ifmaintrin.h",

>From 0d14f817b3349ad0d320ccc77a11bfe703259240 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 26 Dec 2023 10:47:09 +0800
Subject: [PATCH 6/7] Remove AVX512PF intrinsic supports.

---
 clang/include/clang/Basic/BuiltinsX86.def     |   9 --
 clang/include/clang/Driver/Options.td         |   2 -
 clang/lib/Basic/Targets/X86.cpp               |   6 --
 clang/lib/Basic/Targets/X86.h                 |   1 -
 clang/lib/Headers/CMakeLists.txt              |   1 -
 clang/lib/Headers/avx512pfintrin.h            |  92 ----------------
 clang/lib/Headers/cpuid.h                     |   1 -
 clang/lib/Headers/immintrin.h                 |   5 -
 clang/lib/Sema/SemaChecking.cpp               |  20 ----
 clang/test/CodeGen/X86/avx512pf-builtins.c    | 100 ------------------
 clang/test/CodeGen/target-builtin-noerror.c   |   1 -
 clang/test/Preprocessor/x86_target_features.c |  32 ------
 clang/test/Sema/builtins-x86.c                |   8 --
 llvm/include/llvm/IR/IntrinsicsX86.td         |  32 ------
 llvm/lib/Target/X86/X86.td                    |   3 -
 llvm/lib/Target/X86/X86InstrAVX512.td         |   2 +-
 llvm/lib/Target/X86/X86InstrPredicates.td     |   1 -
 llvm/lib/Target/X86/X86IntrinsicsInfo.h       |  17 ---
 llvm/lib/TargetParser/Host.cpp                |   3 -
 ...avx512-gather-scatter-intrin-deprecated.ll |  24 -----
 .../X86/avx512-gather-scatter-intrin.ll       |  24 -----
 .../X86/insert-prefetch-invalid-instr.ll      |   5 -
 .../X86/speculative-load-hardening-gather.ll  |  22 ----
 23 files changed, 1 insertion(+), 410 deletions(-)
 delete mode 100644 clang/lib/Headers/avx512pfintrin.h
 delete mode 100644 clang/test/CodeGen/X86/avx512pf-builtins.c

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 00a69b16a51283..4bca2a5c465ec8 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -955,15 +955,6 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx
 TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vv*UcV8OiV8OiIi", "nV:512:", "avx512f,evex512")
 TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f,evex512")
 
-TARGET_BUILTIN(__builtin_ia32_gatherpfdpd,  "vUcV8ivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfdps,  "vUsV16ivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
-
 TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index a2f26b9ca4c356..3f17fcaf36b908 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5869,8 +5869,6 @@ def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>;
 def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group<m_x86_Features_Group>;
 def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
 def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group<m_x86_Features_Group>;
-def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
-def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
 def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
 def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>;
 def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 8b0e021a488424..4e8613ce7cfc34 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -296,8 +296,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
     } else if (Feature == "+avx512fp16") {
       HasAVX512FP16 = true;
       HasLegalHalfType = true;
-    } else if (Feature == "+avx512pf") {
-      HasAVX512PF = true;
     } else if (Feature == "+avx512dq") {
       HasAVX512DQ = true;
     } else if (Feature == "+avx512bitalg") {
@@ -813,8 +811,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__AVX512BF16__");
   if (HasAVX512FP16)
     Builder.defineMacro("__AVX512FP16__");
-  if (HasAVX512PF)
-    Builder.defineMacro("__AVX512PF__");
   if (HasAVX512DQ)
     Builder.defineMacro("__AVX512DQ__");
   if (HasAVX512BITALG)
@@ -1049,7 +1045,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("avx512vnni", true)
       .Case("avx512bf16", true)
       .Case("avx512fp16", true)
-      .Case("avx512pf", true)
       .Case("avx512dq", true)
       .Case("avx512bitalg", true)
       .Case("avx512bw", true)
@@ -1164,7 +1159,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("avx512vnni", HasAVX512VNNI)
       .Case("avx512bf16", HasAVX512BF16)
       .Case("avx512fp16", HasAVX512FP16)
-      .Case("avx512pf", HasAVX512PF)
       .Case("avx512dq", HasAVX512DQ)
       .Case("avx512bitalg", HasAVX512BITALG)
       .Case("avx512bw", HasAVX512BW)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index a890348073e889..8e5d7d56a17b1f 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -103,7 +103,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasAVX512VNNI = false;
   bool HasAVX512FP16 = false;
   bool HasAVX512BF16 = false;
-  bool HasAVX512PF = false;
   bool HasAVX512DQ = false;
   bool HasAVX512BITALG = false;
   bool HasAVX512BW = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 09c62d14085df2..65e483f3598408 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -155,7 +155,6 @@ set(x86_files
   avx512fp16intrin.h
   avx512ifmaintrin.h
   avx512ifmavlintrin.h
-  avx512pfintrin.h
   avx512vbmi2intrin.h
   avx512vbmiintrin.h
   avx512vbmivlintrin.h
diff --git a/clang/lib/Headers/avx512pfintrin.h b/clang/lib/Headers/avx512pfintrin.h
deleted file mode 100644
index f853be021a2dd3..00000000000000
--- a/clang/lib/Headers/avx512pfintrin.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*===------------- avx512pfintrin.h - PF intrinsics ------------------------===
- *
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef __AVX512PFINTRIN_H
-#define __AVX512PFINTRIN_H
-
-#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
-  __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
-                             (void const *)(addr), (int)(scale), \
-                             (int)(hint))
-
-#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
-  __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
-                             (void const *)(addr), (int)(scale), \
-                             (int)(hint))
-
-#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
-  __builtin_ia32_gatherpfdps((__mmask16)(mask), \
-                             (__v16si)(__m512i)(index), (void const *)(addr), \
-                             (int)(scale), (int)(hint))
-
-#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
-  __builtin_ia32_gatherpfdps((__mmask16) -1, \
-                             (__v16si)(__m512i)(index), (void const *)(addr), \
-                             (int)(scale), (int)(hint))
-
-#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
-  __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
-                             (void const *)(addr), (int)(scale), \
-                             (int)(hint))
-
-#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
-  __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
-                             (void const *)(addr), (int)(scale), \
-                             (int)(hint))
-
-#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
-  __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
-                             (void const *)(addr), (int)(scale), (int)(hint))
-
-#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
-  __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
-                             (void const *)(addr), (int)(scale), (int)(hint))
-
-#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
-  __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
-                              (void *)(addr), (int)(scale), \
-                              (int)(hint))
-
-#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
-  __builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
-                              (void *)(addr), (int)(scale), \
-                              (int)(hint))
-
-#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
-  __builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
-                              (void *)(addr), (int)(scale), (int)(hint))
-
-#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
-  __builtin_ia32_scatterpfdps((__mmask16)(mask), \
-                              (__v16si)(__m512i)(index), (void *)(addr), \
-                              (int)(scale), (int)(hint))
-
-#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
-  __builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
-                              (void *)(addr), (int)(scale), \
-                              (int)(hint))
-
-#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
-  __builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
-                              (void *)(addr), (int)(scale), \
-                              (int)(hint))
-
-#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
-  __builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
-                              (void *)(addr), (int)(scale), (int)(hint))
-
-#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
-  __builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
-                              (void *)(addr), (int)(scale), (int)(hint))
-
-#endif
diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index b09ca3585d6067..0902734cd73af3 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -159,7 +159,6 @@
 #define bit_AVX512IFMA  0x00200000
 #define bit_CLFLUSHOPT  0x00800000
 #define bit_CLWB        0x01000000
-#define bit_AVX512PF    0x04000000
 #define bit_AVX512CD    0x10000000
 #define bit_SHA         0x20000000
 #define bit_AVX512BW    0x40000000
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 876392e9a5daf0..d2aff11e0abfad 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -209,11 +209,6 @@
 #include <avx512vlvbmi2intrin.h>
 #endif
 
-#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
-    defined(__AVX512PF__)
-#include <avx512pfintrin.h>
-#endif
-
 #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) ||      \
     defined(__AVX512FP16__)
 #include <avx512fp16intrin.h>
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e14bb9b1287b12..abffc2f9b760fe 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -6545,16 +6545,6 @@ bool Sema::CheckX86BuiltinGatherScatterScale(unsigned BuiltinID,
   switch (BuiltinID) {
   default:
     return false;
-  case X86::BI__builtin_ia32_gatherpfdpd:
-  case X86::BI__builtin_ia32_gatherpfdps:
-  case X86::BI__builtin_ia32_gatherpfqpd:
-  case X86::BI__builtin_ia32_gatherpfqps:
-  case X86::BI__builtin_ia32_scatterpfdpd:
-  case X86::BI__builtin_ia32_scatterpfdps:
-  case X86::BI__builtin_ia32_scatterpfqpd:
-  case X86::BI__builtin_ia32_scatterpfqps:
-    ArgNum = 3;
-    break;
   case X86::BI__builtin_ia32_gatherd_pd:
   case X86::BI__builtin_ia32_gatherd_pd256:
   case X86::BI__builtin_ia32_gatherq_pd:
@@ -7067,16 +7057,6 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
   case X86::BI__builtin_ia32_vsm3rnds2:
     i = 3; l = 0; u = 255;
     break;
-  case X86::BI__builtin_ia32_gatherpfdpd:
-  case X86::BI__builtin_ia32_gatherpfdps:
-  case X86::BI__builtin_ia32_gatherpfqpd:
-  case X86::BI__builtin_ia32_gatherpfqps:
-  case X86::BI__builtin_ia32_scatterpfdpd:
-  case X86::BI__builtin_ia32_scatterpfdps:
-  case X86::BI__builtin_ia32_scatterpfqpd:
-  case X86::BI__builtin_ia32_scatterpfqps:
-    i = 4; l = 2; u = 3;
-    break;
   case X86::BI__builtin_ia32_reducesd_mask:
   case X86::BI__builtin_ia32_reducess_mask:
   case X86::BI__builtin_ia32_rndscalesd_round_mask:
diff --git a/clang/test/CodeGen/X86/avx512pf-builtins.c b/clang/test/CodeGen/X86/avx512pf-builtins.c
deleted file mode 100644
index 4ca70f5787968b..00000000000000
--- a/clang/test/CodeGen/X86/avx512pf-builtins.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512pf -emit-llvm -o - -Wall -Werror | FileCheck %s
-
-
-#include <immintrin.h>
-
-void test_mm512_mask_prefetch_i32gather_pd(__m256i index, __mmask8 mask, void const *addr) {
-  // CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gatherpf.dpd
-  return _mm512_mask_prefetch_i32gather_pd(index, mask, addr, 2, _MM_HINT_T0); 
-}
-
-void test_mm512_prefetch_i32gather_pd(__m256i index, void const *addr) {
-  // CHECK-LABEL: @test_mm512_prefetch_i32gather_pd
-  // CHECK: @llvm.x86.avx512.gatherpf.dpd
-  return _mm512_prefetch_i32gather_pd(index, addr, 2, _MM_HINT_T0); 
-}
-
-void test_mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, void const *addr) {
-  // CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gatherpf.dps
-  return _mm512_mask_prefetch_i32gather_ps(index, mask, addr, 2, _MM_HINT_T0); 
-}
-
-void test_mm512_prefetch_i32gather_ps(__m512i index,  void const *addr) {
-  // CHECK-LABEL: @test_mm512_prefetch_i32gather_ps
-  // CHECK: @llvm.x86.avx512.gatherpf.dps
-  return _mm512_prefetch_i32gather_ps(index, addr, 2, _MM_HINT_T0); 
-}
-
-void test_mm512_mask_prefetch_i64gather_pd(__m512i index, __mmask8 mask, void const *addr) {
-  // CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gatherpf.qpd
-  return _mm512_mask_prefetch_i64gather_pd(index, mask, addr, 2, _MM_HINT_T0); 
-}
-
-void test_mm512_prefetch_i64gather_pd(__m512i index, void const *addr) {
-  // CHECK-LABEL: @test_mm512_prefetch_i64gather_pd
-  // CHECK: @llvm.x86.avx512.gatherpf.qpd
-  return _mm512_prefetch_i64gather_pd(index, addr, 2, _MM_HINT_T0); 
-}
-
-void test_mm512_mask_prefetch_i64gather_ps(__m512i index, __mmask8 mask, void const *addr) {
-  // CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gatherpf.qps
-  return _mm512_mask_prefetch_i64gather_ps(index, mask, addr, 2, _MM_HINT_T0); 
-}
-
-void test_mm512_prefetch_i64gather_ps(__m512i index, void const *addr) {
-  // CHECK-LABEL: @test_mm512_prefetch_i64gather_ps
-  // CHECK: @llvm.x86.avx512.gatherpf.qps
-  return _mm512_prefetch_i64gather_ps(index, addr, 2, _MM_HINT_T0); 
-}
-
-void test_mm512_prefetch_i32scatter_pd(void *addr, __m256i index) {
-  // CHECK-LABEL: @test_mm512_prefetch_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterpf.dpd.512
-  return _mm512_prefetch_i32scatter_pd(addr, index, 1, _MM_HINT_T1); 
-}
-
-void test_mm512_mask_prefetch_i32scatter_pd(void *addr, __mmask8 mask, __m256i index) {
-  // CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterpf.dpd.512
-  return _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, 1, _MM_HINT_T1); 
-}
-
-void test_mm512_prefetch_i32scatter_ps(void *addr, __m512i index) {
-  // CHECK-LABEL: @test_mm512_prefetch_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterpf.dps.512
-  return _mm512_prefetch_i32scatter_ps(addr, index, 1, _MM_HINT_T1); 
-}
-
-void test_mm512_mask_prefetch_i32scatter_ps(void *addr, __mmask16 mask, __m512i index) {
-  // CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterpf.dps.512
-  return _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, 1, _MM_HINT_T1); 
-}
-
-void test_mm512_prefetch_i64scatter_pd(void *addr, __m512i index) {
-  // CHECK-LABEL: @test_mm512_prefetch_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterpf.qpd.512
-  return _mm512_prefetch_i64scatter_pd(addr, index, 1, _MM_HINT_T1); 
-}
-
-void test_mm512_mask_prefetch_i64scatter_pd(void *addr, __mmask16 mask, __m512i index) {
-  // CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_pd
-  // CHECK: @llvm.x86.avx512.scatterpf.qpd.512
-  return _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, 1, _MM_HINT_T1); 
-}
-
-void test_mm512_prefetch_i64scatter_ps(void *addr, __m512i index) {
-  // CHECK-LABEL: @test_mm512_prefetch_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterpf.qps.512
-  return _mm512_prefetch_i64scatter_ps(addr, index, 1, _MM_HINT_T1); 
-}
-
-void test_mm512_mask_prefetch_i64scatter_ps(void *addr, __mmask16 mask, __m512i index) {
-  // CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_ps
-  // CHECK: @llvm.x86.avx512.scatterpf.qps.512
-  return _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, 1, _MM_HINT_T1); 
-}
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 9beea3bdef69a7..43ee8d5740c3b6 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -68,7 +68,6 @@ void verifyfeaturestrings(void) {
   (void)__builtin_cpu_supports("avx512bw");
   (void)__builtin_cpu_supports("avx512dq");
   (void)__builtin_cpu_supports("avx512cd");
-  (void)__builtin_cpu_supports("avx512pf");
   (void)__builtin_cpu_supports("avx512vbmi");
   (void)__builtin_cpu_supports("avx512ifma");
   (void)__builtin_cpu_supports("avx5124vnniw");
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index acba2a0a9bda22..e667a693522311 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -90,22 +90,6 @@
 // AVX512CD: #define __SSE__ 1
 // AVX512CD: #define __SSSE3__ 1
 
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512PF %s
-
-// AVX512PF: #define __AVX2__ 1
-// AVX512PF: #define __AVX512F__ 1
-// AVX512PF: #define __AVX512PF__ 1
-// AVX512PF: #define __AVX__ 1
-// AVX512PF: #define __EVEX512__ 1
-// AVX512PF: #define __SSE2_MATH__ 1
-// AVX512PF: #define __SSE2__ 1
-// AVX512PF: #define __SSE3__ 1
-// AVX512PF: #define __SSE4_1__ 1
-// AVX512PF: #define __SSE4_2__ 1
-// AVX512PF: #define __SSE_MATH__ 1
-// AVX512PF: #define __SSE__ 1
-// AVX512PF: #define __SSSE3__ 1
-
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512dq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512DQ %s
 
 // AVX512DQ: #define __AVX2__ 1
@@ -155,22 +139,6 @@
 // AVX512VL: #define __SSE__ 1
 // AVX512VL: #define __SSSE3__ 1
 
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512F2 %s
-
-// AVX512F2: #define __AVX2__ 1
-// AVX512F2-NOT: #define __AVX512F__ 1
-// AVX512F2-NOT: #define __AVX512PF__ 1
-// AVX512F2-NOT: #define __EVEX512__ 1
-// AVX512F2: #define __AVX__ 1
-// AVX512F2: #define __SSE2_MATH__ 1
-// AVX512F2: #define __SSE2__ 1
-// AVX512F2: #define __SSE3__ 1
-// AVX512F2: #define __SSE4_1__ 1
-// AVX512F2: #define __SSE4_2__ 1
-// AVX512F2: #define __SSE_MATH__ 1
-// AVX512F2: #define __SSE__ 1
-// AVX512F2: #define __SSSE3__ 1
-
 // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512ifma -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512IFMA %s
 
 // AVX512IFMA: #define __AVX2__ 1
diff --git a/clang/test/Sema/builtins-x86.c b/clang/test/Sema/builtins-x86.c
index cbaf7bcde871e1..7d9cdce3d78948 100644
--- a/clang/test/Sema/builtins-x86.c
+++ b/clang/test/Sema/builtins-x86.c
@@ -106,14 +106,6 @@ __m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c, __m128i
   return __builtin_ia32_gatherd_d(a, b, c, mask, 5); // expected-error {{scale argument must be 1, 2, 4, or 8}}
 }
 
-void _mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, int const *addr) {
-  __builtin_ia32_gatherpfdps(mask, index, addr, 5, 1); // expected-error {{scale argument must be 1, 2, 4, or 8}}
-}
-
-void _mm512_mask_prefetch_i32gather_ps_2(__m512i index, __mmask16 mask, int const *addr) {
-  __builtin_ia32_gatherpfdps(mask, index, addr, 1, 1); // expected-error {{argument value 1 is outside the valid range [2, 3]}}
-}
-
 __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) {
   return __builtin_ia32_vpshldq512(__A, __B, 1024); // expected-error {{argument value 1024 is outside the valid range [0, 255]}}
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 0fda7e66c06a75..aee804047e1b06 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4125,38 +4125,6 @@ let TargetPrefix = "x86" in {
           Intrinsic<[],
           [llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
           [ImmArg<ArgIndex<4>>]>;
-
-  // gather prefetch
-  // NOTE: These can't be ArgMemOnly because you can put the address completely
-  // in the index register.
-  def int_x86_avx512_gatherpf_dpd_512  : ClangBuiltin<"__builtin_ia32_gatherpfdpd">,
-          Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
-  def int_x86_avx512_gatherpf_dps_512  : ClangBuiltin<"__builtin_ia32_gatherpfdps">,
-          Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
-  def int_x86_avx512_gatherpf_qpd_512  : ClangBuiltin<"__builtin_ia32_gatherpfqpd">,
-          Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
-  def int_x86_avx512_gatherpf_qps_512  : ClangBuiltin<"__builtin_ia32_gatherpfqps">,
-          Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
-
-  // scatter prefetch
-  // NOTE: These can't be ArgMemOnly because you can put the address completely
-  // in the index register.
-  def int_x86_avx512_scatterpf_dpd_512  : ClangBuiltin<"__builtin_ia32_scatterpfdpd">,
-          Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
-  def int_x86_avx512_scatterpf_dps_512  : ClangBuiltin<"__builtin_ia32_scatterpfdps">,
-          Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
-  def int_x86_avx512_scatterpf_qpd_512  : ClangBuiltin<"__builtin_ia32_scatterpfqpd">,
-          Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
-  def int_x86_avx512_scatterpf_qps_512  : ClangBuiltin<"__builtin_ia32_scatterpfqps">,
-          Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
-                     llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
 }
 
 // AVX512 gather/scatter intrinsics that use vXi1 masks.
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 682b32e29cff55..50b7fb677f3aaf 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -130,9 +130,6 @@ def FeatureCDI      : SubtargetFeature<"avx512cd", "HasCDI", "true",
 def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
                        "true", "Enable AVX-512 Population Count Instructions",
                                       [FeatureAVX512]>;
-def FeaturePFI      : SubtargetFeature<"avx512pf", "HasPFI", "true",
-                      "Enable AVX-512 PreFetch Instructions",
-                                      [FeatureAVX512]>;
 def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
                                    "true",
                                    "Prefetch instruction with T0 or T1 Hint">;
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 981cefc1b55638..9b89b298f743f5 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10338,7 +10338,7 @@ defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter",
 // prefetch
 multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
                        RegisterClass KRC, X86MemOperand memop> {
-  let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
+  let mayLoad = 1, mayStore = 1 in
   def m  : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
             !strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
             EVEX, EVEX_K, Sched<[WriteLoad]>;
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index 6b89d2834a1da3..0ea0bcbfe2ec6b 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -43,7 +43,6 @@ def UseAVX2      : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
 def NoAVX512     : Predicate<"!Subtarget->hasAVX512()">;
 def HasCDI       : Predicate<"Subtarget->hasCDI()">;
 def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">;
-def HasPFI       : Predicate<"Subtarget->hasPFI()">;
 def HasDQI       : Predicate<"Subtarget->hasDQI()">;
 def NoDQI        : Predicate<"!Subtarget->hasDQI()">;
 def HasBWI       : Predicate<"Subtarget->hasBWI()">;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 9a0a4e86570357..e3961e0094d3ac 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -108,15 +108,6 @@ static const IntrinsicData IntrinsicsWithChain[] = {
   X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, 0, 0),
   X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, 0, 0),
 
-  X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
-                     X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
-  X86_INTRINSIC_DATA(avx512_gatherpf_dps_512, PREFETCH,
-                     X86::VGATHERPF0DPSm, X86::VGATHERPF1DPSm),
-  X86_INTRINSIC_DATA(avx512_gatherpf_qpd_512, PREFETCH,
-                     X86::VGATHERPF0QPDm, X86::VGATHERPF1QPDm),
-  X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
-                     X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
-
   X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, 0, 0),
   X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, 0, 0),
   X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, 0, 0),
@@ -292,14 +283,6 @@ static const IntrinsicData IntrinsicsWithChain[] = {
   X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, 0, 0),
   X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, 0, 0),
   X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, 0, 0),
-  X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm,
-                     X86::VSCATTERPF1DPDm),
-  X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm,
-                     X86::VSCATTERPF1DPSm),
-  X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm,
-                     X86::VSCATTERPF1QPDm),
-  X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm,
-                     X86::VSCATTERPF1QPSm),
   X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, 0, 0),
   X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, 0, 0),
   X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, 0, 0),
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index cae3ba1dffe481..ab7c28bb5db7a8 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1266,8 +1266,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
     setFeature(X86::FEATURE_AVX512IFMA);
   if (HasLeaf7 && ((EBX >> 23) & 1))
     setFeature(X86::FEATURE_CLFLUSHOPT);
-  if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
-    setFeature(X86::FEATURE_AVX512PF);
   if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
     setFeature(X86::FEATURE_AVX512CD);
   if (HasLeaf7 && ((EBX >> 29) & 1))
@@ -1765,7 +1763,6 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
   Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
   Features["clwb"]       = HasLeaf7 && ((EBX >> 24) & 1);
-  Features["avx512pf"]   = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
   Features["avx512cd"]   = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
   Features["sha"]        = HasLeaf7 && ((EBX >> 29) & 1);
   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll
index 8d09497cefb1b8..77053e2c1bc984 100644
--- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll
+++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll
@@ -268,30 +268,6 @@ define void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, ptr %stbuf)
   ret void
 }
 
-declare  void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
-declare  void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
-define void @prefetch(<8 x i64> %ind, ptr %base) {
-; CHECK-LABEL: prefetch:
-; CHECK:       ## %bb.0:
-; CHECK-NEXT:    kxnorw %k0, %k0, %k1
-; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT:    kxorw %k0, %k0, %k1
-; CHECK-NEXT:    vgatherpf1qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    kmovd %eax, %k1
-; CHECK-NEXT:    vscatterpf0qps (%rdi,%zmm0,2) {%k1}
-; CHECK-NEXT:    movb $120, %al
-; CHECK-NEXT:    kmovd %eax, %k1
-; CHECK-NEXT:    vscatterpf1qps (%rdi,%zmm0,2) {%k1}
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, ptr %base, i32 4, i32 3)
-  call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, ptr %base, i32 4, i32 2)
-  call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, ptr %base, i32 2, i32 3)
-  call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, ptr %base, i32 2, i32 2)
-  ret void
-}
-
 declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, ptr, <2 x i64>, i8, i32)
 
 define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) {
diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index acbf4387255c5a..df71e3c3afa5ec 100644
--- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -265,30 +265,6 @@ define dso_local void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, p
   ret void
 }
 
-declare  void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
-declare  void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
-define dso_local void @prefetch(<8 x i64> %ind, ptr %base) {
-; CHECK-LABEL: prefetch:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    kxnorw %k0, %k0, %k1
-; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT:    kxorw %k0, %k0, %k1
-; CHECK-NEXT:    vgatherpf1qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT:    movb $1, %al
-; CHECK-NEXT:    kmovd %eax, %k1
-; CHECK-NEXT:    vscatterpf0qps (%rdi,%zmm0,2) {%k1}
-; CHECK-NEXT:    movb $120, %al
-; CHECK-NEXT:    kmovd %eax, %k1
-; CHECK-NEXT:    vscatterpf1qps (%rdi,%zmm0,2) {%k1}
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-  call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, ptr %base, i32 4, i32 3)
-  call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, ptr %base, i32 4, i32 2)
-  call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, ptr %base, i32 2, i32 3)
-  call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, ptr %base, i32 2, i32 2)
-  ret void
-}
-
 define <2 x double> @test_int_x86_avx512_mask_gather3div2_df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) {
 ; CHECK-LABEL: test_int_x86_avx512_mask_gather3div2_df:
 ; CHECK:       # %bb.0:
diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
index 2f5a36865d4ae3..7bdbb19d1714d3 100644
--- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
+++ b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
@@ -8,16 +8,12 @@ target triple = "x86_64-unknown-linux-gnu"
 define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 {
 entry:
   tail call void @llvm.prefetch(ptr inttoptr (i64 291 to ptr), i32 0, i32 0, i32 1), !dbg !9
-  tail call void @llvm.x86.avx512.gatherpf.dpd.512(i8 97, <8 x i32> undef, ptr null, i32 1, i32 2), !dbg !10
   ret i32 291, !dbg !11
 }
 
 ; Function Attrs: inaccessiblemem_or_argmemonly nounwind
 declare void @llvm.prefetch(ptr nocapture readonly, i32, i32, i32) #1
 
-; Function Attrs: argmemonly nounwind
-declare void @llvm.x86.avx512.gatherpf.dpd.512(i8, <8 x i32>, ptr, i32, i32) #2
-
 attributes #0 = {"target-cpu"="x86-64" "target-features"="+avx512pf,+sse4.2,+ssse3"}
 attributes #1 = { inaccessiblemem_or_argmemonly nounwind }
 attributes #2 = { argmemonly nounwind }
@@ -43,4 +39,3 @@ attributes #2 = { argmemonly nounwind }
 ;CHECK:       # %bb.0:
 ;CHECK:       prefetchnta 291
 ;CHECK-NOT:   prefetchnta 42(%rax,%ymm0)
-;CHECK:       vgatherpf1dpd (%rax,%ymm0) {%k1}
diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll b/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll
index 6e89445bead637..7b3667420ec6d4 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll
@@ -558,28 +558,6 @@ entry:
   ret <8 x i64> %v
 }
 
-declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr, i32, i32);
-
-define void @test_llvm_x86_avx512_gatherpf_qps_512(<8 x i64> %iv, ptr %b) #1 {
-; CHECK-LABEL: test_llvm_x86_avx512_gatherpf_qps_512:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movq %rsp, %rax
-; CHECK-NEXT:    movq $-1, %rcx
-; CHECK-NEXT:    sarq $63, %rax
-; CHECK-NEXT:    kxnorw %k0, %k0, %k1
-; CHECK-NEXT:    orq %rax, %rdi
-; CHECK-NEXT:    vpbroadcastq %rax, %zmm1
-; CHECK-NEXT:    vporq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT:    vgatherpf0qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT:    shlq $47, %rax
-; CHECK-NEXT:    orq %rax, %rsp
-; CHECK-NEXT:    vzeroupper
-; CHECK-NEXT:    retq
-entry:
-  call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %iv, ptr %b, i32 4, i32 3)
-  ret void
-}
-
 declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, ptr, <4 x i32>, i8, i32)
 
 define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(ptr %b, <4 x i32> %iv) #2 {

>From 87d15d7ad6141d7492e9579928839408d60e8152 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 26 Dec 2023 13:33:16 +0800
Subject: [PATCH 7/7] Remove PREFETCHWT1 intrinsic supports.

---
 clang/include/clang/Driver/Options.td           |  2 --
 clang/lib/Basic/Targets/X86.cpp                 |  6 ------
 clang/lib/Basic/Targets/X86.h                   |  1 -
 clang/test/Driver/x86-target-features.c         |  5 -----
 .../llvm/TargetParser/X86TargetParser.def       |  1 -
 llvm/lib/Target/X86/X86.td                      |  3 ---
 llvm/lib/Target/X86/X86Instr3DNow.td            |  3 +--
 llvm/lib/Target/X86/X86InstrFragments.td        |  8 +-------
 llvm/lib/Target/X86/X86InstrPredicates.td       |  1 -
 llvm/lib/Target/X86/X86Subtarget.h              |  7 +++----
 llvm/lib/TargetParser/Host.cpp                  |  1 -
 llvm/lib/TargetParser/X86TargetParser.cpp       |  1 -
 .../test/CodeGen/X86/avx512-cmp-kor-sequence.ll |  2 +-
 llvm/test/CodeGen/X86/prefetch.ll               | 17 -----------------
 .../LoopStrengthReduce/X86/pr40514.ll           |  2 +-
 .../Transforms/LoopVectorize/X86/pr23997.ll     |  2 +-
 .../Transforms/LoopVectorize/X86/pr54634.ll     |  2 +-
 .../LoopVectorize/X86/scatter_crash.ll          |  2 +-
 .../Transforms/SLPVectorizer/X86/vector_gep.ll  |  2 +-
 .../pattern-matching-based-opts-after-delicm.ll |  2 +-
 20 files changed, 12 insertions(+), 58 deletions(-)

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 3f17fcaf36b908..f4a731cc699bc2 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5959,8 +5959,6 @@ def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
 def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
 def mprefetchi : Flag<["-"], "mprefetchi">, Group<m_x86_Features_Group>;
 def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group<m_x86_Features_Group>;
-def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
-def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>;
 def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
 def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
 def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 4e8613ce7cfc34..516c98adfff7ed 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -352,8 +352,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasWBNOINVD = true;
     } else if (Feature == "+prefetchi") {
       HasPREFETCHI = true;
-    } else if (Feature == "+prefetchwt1") {
-      HasPREFETCHWT1 = true;
     } else if (Feature == "+clzero") {
       HasCLZERO = true;
     } else if (Feature == "+cldemote") {
@@ -862,8 +860,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__SM4__");
   if (HasPREFETCHI)
     Builder.defineMacro("__PREFETCHI__");
-  if (HasPREFETCHWT1)
-    Builder.defineMacro("__PREFETCHWT1__");
   if (HasCLZERO)
     Builder.defineMacro("__CLZERO__");
   if (HasKL)
@@ -1092,7 +1088,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("pku", true)
       .Case("popcnt", true)
       .Case("prefetchi", true)
-      .Case("prefetchwt1", true)
       .Case("prfchw", true)
       .Case("ptwrite", true)
       .Case("raoint", true)
@@ -1208,7 +1203,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("pku", HasPKU)
       .Case("popcnt", HasPOPCNT)
       .Case("prefetchi", HasPREFETCHI)
-      .Case("prefetchwt1", HasPREFETCHWT1)
       .Case("prfchw", HasPRFCHW)
       .Case("ptwrite", HasPTWRITE)
       .Case("raoint", HasRAOINT)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 8e5d7d56a17b1f..36980c42cfead9 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -134,7 +134,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasCLWB = false;
   bool HasMOVBE = false;
   bool HasPREFETCHI = false;
-  bool HasPREFETCHWT1 = false;
   bool HasRDPID = false;
   bool HasRDPRU = false;
   bool HasRetpolineExternalThunk = false;
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 942df9259e6a8e..856b19af96ffea 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -81,11 +81,6 @@
 // SGX: "-target-feature" "+sgx"
 // NO-SGX: "-target-feature" "-sgx"
 
-// RUN: %clang --target=i386 -march=i386 -mprefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=PREFETCHWT1 %s
-// RUN: %clang --target=i386 -march=i386 -mno-prefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=NO-PREFETCHWT1 %s
-// PREFETCHWT1: "-target-feature" "+prefetchwt1"
-// NO-PREFETCHWT1: "-target-feature" "-prefetchwt1"
-
 // RUN: %clang --target=i386 -march=i386 -mprefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PREFETCHI %s
 // RUN: %clang --target=i386 -march=i386 -mno-prefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PREFETCHI %s
 // PREFETCHI: "-target-feature" "+prefetchi"
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index b58feafe4e8c24..2a204042452968 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -204,7 +204,6 @@ X86_FEATURE       (MWAITX,          "mwaitx")
 X86_FEATURE       (PCONFIG,         "pconfig")
 X86_FEATURE       (PKU,             "pku")
 X86_FEATURE       (PREFETCHI,       "prefetchi")
-X86_FEATURE       (PREFETCHWT1,     "prefetchwt1")
 X86_FEATURE       (PRFCHW,          "prfchw")
 X86_FEATURE       (PTWRITE,         "ptwrite")
 X86_FEATURE       (RDPID,           "rdpid")
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 50b7fb677f3aaf..a60615c4d78c37 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -133,9 +133,6 @@ def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
 def FeaturePREFETCHI  : SubtargetFeature<"prefetchi", "HasPREFETCHI",
                                    "true",
                                    "Prefetch instruction with T0 or T1 Hint">;
-def FeaturePREFETCHWT1  : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
-                                   "true",
-                                   "Prefetch with Intent to Write and T1 Hint">;
 def FeatureDQI     : SubtargetFeature<"avx512dq", "HasDQI", "true",
                       "Enable AVX-512 Doubleword and Quadword Instructions",
                                       [FeatureAVX512]>;
diff --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td
index 3be03ab0f4332b..03612de0fad942 100644
--- a/llvm/lib/Target/X86/X86Instr3DNow.td
+++ b/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -90,8 +90,7 @@ def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
                   TB, Requires<[HasPrefetchW]>;
 
 def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
-                    [(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))]>,
-                    TB, Requires<[HasPREFETCHWT1]>;
+                    []>, TB;
 }
 
 // "3DNowA" instructions
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index adf527d72f5b43..b6b623aa1b78a7 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -604,14 +604,8 @@ def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
                           [(X86strict_fcmp node:$lhs, node:$rhs),
                            (X86fcmp node:$lhs, node:$rhs)]>;
 
-// PREFETCHWT1 is supported we want to use it for everything but T0.
 def PrefetchWLevel : PatFrag<(ops), (i32 timm), [{
-  return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
-}]>;
-
-// Use PREFETCHWT1 for NTA, T2, T1.
-def PrefetchWT1Level : TImmLeaf<i32, [{
-  return Imm < 3;
+  return N->getSExtValue() <= 3;
 }]>;
 
 def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index 0ea0bcbfe2ec6b..0f2f7429e1affa 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -108,7 +108,6 @@ def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
 def HasPRFCHW    : Predicate<"Subtarget->hasPRFCHW()">;
 def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">;
 def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
-def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
 def HasLAHFSAHF  : Predicate<"Subtarget->hasLAHFSAHF()">;
 def HasLAHFSAHF64 : Predicate<"Subtarget->hasLAHFSAHF64()">;
 def HasMWAITX    : Predicate<"Subtarget->hasMWAITX()">;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index a458b5f9ec8fbb..b4480819f09bea 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -213,16 +213,15 @@ class X86Subtarget final : public X86GenSubtargetInfo {
   bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
   bool hasPrefetchW() const {
     // The PREFETCHW instruction was added with 3DNow but later CPUs gave it
-    // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
-    // it and KNL has another that prefetches to L2 cache. We assume the
+    // its own CPUID bit as part of deprecating 3DNow. We assume the
     // L1 version exists if the L2 version does.
-    return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
+    return hasThreeDNow() || hasPRFCHW();
   }
   bool hasSSEPrefetch() const {
     // We implicitly enable these when we have a write prefix supporting cache
     // level OR if we have prfchw, but don't already have a read prefetch from
     // 3dnow.
-    return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
+    return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) ||
            hasPREFETCHI();
   }
   bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index ab7c28bb5db7a8..aaf5b760dfa0f9 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1768,7 +1768,6 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
   Features["avx512bw"]   = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
   Features["avx512vl"]   = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
 
-  Features["prefetchwt1"]     = HasLeaf7 && ((ECX >>  0) & 1);
   Features["avx512vbmi"]      = HasLeaf7 && ((ECX >>  1) & 1) && HasAVX512Save;
   Features["pku"]             = HasLeaf7 && ((ECX >>  4) & 1);
   Features["waitpkg"]         = HasLeaf7 && ((ECX >>  5) & 1);
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index df900aed145ff9..53cbc22840eb64 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -490,7 +490,6 @@ constexpr FeatureBitset ImpliedFeaturesMOVDIRI = {};
 constexpr FeatureBitset ImpliedFeaturesPCONFIG = {};
 constexpr FeatureBitset ImpliedFeaturesPOPCNT = {};
 constexpr FeatureBitset ImpliedFeaturesPKU = {};
-constexpr FeatureBitset ImpliedFeaturesPREFETCHWT1 = {};
 constexpr FeatureBitset ImpliedFeaturesPRFCHW = {};
 constexpr FeatureBitset ImpliedFeaturesPTWRITE = {};
 constexpr FeatureBitset ImpliedFeaturesRDPID = {};
diff --git a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
index 7676a65b735e06..b4ba23934d54df 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
@@ -48,5 +48,5 @@ entry:
 ; Function Attrs: nounwind readnone
 declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) #1
 
-attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
 attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll
index 3cfa0e3efcb1e1..4de0cb3948f0c4 100644
--- a/llvm/test/CodeGen/X86/prefetch.ll
+++ b/llvm/test/CodeGen/X86/prefetch.ll
@@ -6,9 +6,6 @@
 ; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=PRFCHWSSE
 ; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHWSSE
 ; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=SSE
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
-; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
-; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
 ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW
 ; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=3DNOW
 
@@ -16,7 +13,6 @@
 ; 3dnow by itself get you just the single prefetch instruction with no hints
 ; sse provides prefetch0/1/2/nta
 ; supporting prefetchw, but not 3dnow implicitly provides prefetcht0/1/2/nta regardless of sse setting as we need something to fall back to for the non-write hint.
-; supporting prefetchwt1 implies prefetcht0/1/2/nta and prefetchw regardless of other settings. this allows levels for non-write and gives us an instruction for write+T0
 ; 3dnow prefetch instruction will only get used if you have no other prefetch instructions enabled
 
 ; rdar://10538297
@@ -48,19 +44,6 @@ define void @t(ptr %ptr) nounwind  {
 ; PRFCHWSSE-NEXT:    prefetchw (%eax)
 ; PRFCHWSSE-NEXT:    retl
 ;
-; PREFETCHWT1-LABEL: t:
-; PREFETCHWT1:       # %bb.0: # %entry
-; PREFETCHWT1-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; PREFETCHWT1-NEXT:    prefetcht2 (%eax)
-; PREFETCHWT1-NEXT:    prefetcht1 (%eax)
-; PREFETCHWT1-NEXT:    prefetcht0 (%eax)
-; PREFETCHWT1-NEXT:    prefetchnta (%eax)
-; PREFETCHWT1-NEXT:    prefetchwt1 (%eax)
-; PREFETCHWT1-NEXT:    prefetchwt1 (%eax)
-; PREFETCHWT1-NEXT:    prefetchw (%eax)
-; PREFETCHWT1-NEXT:    prefetchwt1 (%eax)
-; PREFETCHWT1-NEXT:    retl
-;
 ; 3DNOW-LABEL: t:
 ; 3DNOW:       # %bb.0: # %entry
 ; 3DNOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
index a461f35d00dc98..a6bff63dfc7158 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
@@ -54,4 +54,4 @@ bb10:                                             ; preds = %bb10, %bb
 }
 
 
-attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
+attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
index 3e3018f5060948..b94ebf109163e4 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
@@ -88,7 +88,7 @@ loopexit:
   ret void
 }
 
-attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
+attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
 
 !0 = !{i32 0, i32 2147483646}
 !1 = !{}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
index 20566005c93dfd..743ca20f92b49b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
@@ -118,7 +118,7 @@ L44:                                              ; preds = %L26
   ret ptr addrspace(10) null
 }
 
-attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-sha,-prefetchwt1,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-sha,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
 attributes #1 = { inaccessiblemem_or_argmemonly }
 attributes #2 = { allocsize(1) }
 
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index 4839e3edf7b4df..ce460f4fe35425 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -111,4 +111,4 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99
 }
 
-attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
index 02bf77a5e103dd..9e8cdc62c729ac 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
@@ -26,5 +26,5 @@ entry:
   unreachable
 }
 
-attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
index 66011168fcc137..060140da0babe7 100644
--- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
@@ -93,7 +93,7 @@ for.end27:                                        ; preds = %for.inc25
   ret void
 }
 
-attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-clflushopt,-clwb,-clzero,-fma4,-lwp,-mwaitx,-pku,-prefetchwt1,-prfchw,-rdseed,-rtm,-sgx,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-clflushopt,-clwb,-clzero,-fma4,-lwp,-mwaitx,-pku,-prfchw,-rdseed,-rtm,-sgx,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
 
 !llvm.module.flags = !{!0}
 !llvm.ident = !{!1}



More information about the cfe-commits mailing list