[clang] [llvm] [polly] [X86] Remove knl/knm specific ISAs supports (PR #92883)
Freddy Ye via cfe-commits
cfe-commits at lists.llvm.org
Tue May 21 23:10:48 PDT 2024
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/92883
>From d67274e0fa28d7288515a97327d2f19391d15ed6 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 26 Dec 2023 09:51:43 +0800
Subject: [PATCH 1/6] Remove AVX512ER intrinsic supports.
---
clang/include/clang/Basic/BuiltinsX86.def | 12 -
clang/include/clang/Driver/Options.td | 2 -
clang/lib/Basic/Targets/X86.cpp | 7 -
clang/lib/Basic/Targets/X86.h | 1 -
clang/lib/Headers/CMakeLists.txt | 1 -
clang/lib/Headers/avx512erintrin.h | 271 -------------
clang/lib/Headers/cpuid.h | 1 -
clang/lib/Headers/immintrin.h | 4 -
clang/lib/Sema/SemaChecking.cpp | 10 -
clang/test/CodeGen/X86/avx512er-builtins.c | 347 ----------------
clang/test/CodeGen/function-target-features.c | 4 +-
clang/test/CodeGen/target-builtin-noerror.c | 1 -
clang/test/Driver/cl-x86-flags.c | 2 -
clang/test/Driver/x86-target-features.c | 8 +-
clang/test/Frontend/x86-target-cpu.c | 2 -
.../Preprocessor/predefined-arch-macros.c | 4 -
clang/test/Preprocessor/x86_target_features.c | 18 -
llvm/include/llvm/IR/IntrinsicsX86.td | 52 ---
llvm/lib/Target/X86/X86.td | 4 -
llvm/lib/Target/X86/X86InstrAVX512.td | 79 +++-
llvm/lib/Target/X86/X86InstrPredicates.td | 1 -
llvm/lib/Target/X86/X86IntrinsicsInfo.h | 10 -
llvm/lib/TargetParser/Host.cpp | 5 -
llvm/lib/TargetParser/X86TargetParser.cpp | 2 +-
llvm/test/CodeGen/X86/avx512er-intrinsics.ll | 306 --------------
llvm/test/CodeGen/X86/crc32-target-feature.ll | 4 +-
llvm/test/CodeGen/X86/unfoldMemoryOperand.mir | 2 +-
.../LoopStrengthReduce/X86/pr40514.ll | 2 +-
.../Transforms/LoopVectorize/X86/pr23997.ll | 2 +-
.../Transforms/LoopVectorize/X86/pr54634.ll | 2 +-
.../llvm-mca/X86/Generic/resources-avx512er.s | 373 ------------------
.../gn/secondary/clang/lib/Headers/BUILD.gn | 1 -
32 files changed, 77 insertions(+), 1463 deletions(-)
delete mode 100644 clang/lib/Headers/avx512erintrin.h
delete mode 100644 clang/test/CodeGen/X86/avx512er-builtins.c
delete mode 100644 llvm/test/CodeGen/X86/avx512er-intrinsics.ll
delete mode 100644 llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index eafcc219c1096..7118ee0206ce1 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -832,23 +832,11 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx
TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
-
TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
-
TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7bb781667e926..5b681382f2b07 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6111,8 +6111,6 @@ def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
-def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
-def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>;
def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group<m_x86_Features_Group>;
def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index b823eaf6ce336..a263c26904cd6 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -310,9 +310,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512VNNI = true;
} else if (Feature == "+avx512bf16") {
HasAVX512BF16 = true;
- } else if (Feature == "+avx512er") {
- HasAVX512ER = true;
- Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+avx512fp16") {
HasAVX512FP16 = true;
HasLegalHalfType = true;
@@ -840,8 +837,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512VNNI__");
if (HasAVX512BF16)
Builder.defineMacro("__AVX512BF16__");
- if (HasAVX512ER)
- Builder.defineMacro("__AVX512ER__");
if (HasAVX512FP16)
Builder.defineMacro("__AVX512FP16__");
if (HasAVX512PF)
@@ -1084,7 +1079,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512vpopcntdq", true)
.Case("avx512vnni", true)
.Case("avx512bf16", true)
- .Case("avx512er", true)
.Case("avx512fp16", true)
.Case("avx512pf", true)
.Case("avx512dq", true)
@@ -1201,7 +1195,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
.Case("avx512vnni", HasAVX512VNNI)
.Case("avx512bf16", HasAVX512BF16)
- .Case("avx512er", HasAVX512ER)
.Case("avx512fp16", HasAVX512FP16)
.Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 6a0a6cb84203d..cfb163d14b8d5 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -103,7 +103,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512VNNI = false;
bool HasAVX512FP16 = false;
bool HasAVX512BF16 = false;
- bool HasAVX512ER = false;
bool HasAVX512PF = false;
bool HasAVX512DQ = false;
bool HasAVX512BITALG = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 5f02c71f6ca51..95e0d444d4344 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -153,7 +153,6 @@ set(x86_files
avx512bwintrin.h
avx512cdintrin.h
avx512dqintrin.h
- avx512erintrin.h
avx512fintrin.h
avx512fp16intrin.h
avx512ifmaintrin.h
diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h
deleted file mode 100644
index 1c5a2d2d208ff..0000000000000
--- a/clang/lib/Headers/avx512erintrin.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef __AVX512ERINTRIN_H
-#define __AVX512ERINTRIN_H
-
-/* exp2a23 */
-#define _mm512_exp2a23_round_pd(A, R) \
- ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
- ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(S), (__mmask8)(M), \
- (int)(R)))
-
-#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
- ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm512_exp2a23_pd(A) \
- _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_exp2a23_pd(S, M, A) \
- _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_exp2a23_pd(M, A) \
- _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_exp2a23_round_ps(A, R) \
- ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R)))
-
-#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
- ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(S), (__mmask16)(M), \
- (int)(R)))
-
-#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
- ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(M), (int)(R)))
-
-#define _mm512_exp2a23_ps(A) \
- _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_exp2a23_ps(S, M, A) \
- _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_exp2a23_ps(M, A) \
- _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-/* rsqrt28 */
-#define _mm512_rsqrt28_round_pd(A, R) \
- ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
- ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(S), (__mmask8)(M), \
- (int)(R)))
-
-#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
- ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm512_rsqrt28_pd(A) \
- _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rsqrt28_pd(S, M, A) \
- _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rsqrt28_pd(M, A) \
- _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rsqrt28_round_ps(A, R) \
- ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R)))
-
-#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
- ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(S), (__mmask16)(M), \
- (int)(R)))
-
-#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
- ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(M), (int)(R)))
-
-#define _mm512_rsqrt28_ps(A) \
- _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rsqrt28_ps(S, M, A) \
- _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rsqrt28_ps(M, A) \
- _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rsqrt28_round_ss(A, B, R) \
- ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
- ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(S), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
- ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_rsqrt28_ss(A, B) \
- _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rsqrt28_ss(S, M, A, B) \
- _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rsqrt28_ss(M, A, B) \
- _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rsqrt28_round_sd(A, B, R) \
- ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
- ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(S), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
- ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_rsqrt28_sd(A, B) \
- _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rsqrt28_sd(S, M, A, B) \
- _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rsqrt28_sd(M, A, B) \
- _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-/* rcp28 */
-#define _mm512_rcp28_round_pd(A, R) \
- ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
- ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(S), (__mmask8)(M), \
- (int)(R)))
-
-#define _mm512_maskz_rcp28_round_pd(M, A, R) \
- ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm512_rcp28_pd(A) \
- _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rcp28_pd(S, M, A) \
- _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rcp28_pd(M, A) \
- _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rcp28_round_ps(A, R) \
- ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R)))
-
-#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
- ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(S), (__mmask16)(M), \
- (int)(R)))
-
-#define _mm512_maskz_rcp28_round_ps(M, A, R) \
- ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(M), (int)(R)))
-
-#define _mm512_rcp28_ps(A) \
- _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rcp28_ps(S, M, A) \
- _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rcp28_ps(M, A) \
- _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rcp28_round_ss(A, B, R) \
- ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rcp28_round_ss(S, M, A, B, R) \
- ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(S), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rcp28_round_ss(M, A, B, R) \
- ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_rcp28_ss(A, B) \
- _mm_rcp28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rcp28_ss(S, M, A, B) \
- _mm_mask_rcp28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rcp28_ss(M, A, B) \
- _mm_maskz_rcp28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rcp28_round_sd(A, B, R) \
- ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rcp28_round_sd(S, M, A, B, R) \
- ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(S), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rcp28_round_sd(M, A, B, R) \
- ((__m128d)__builtin_ia32_rcp28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_rcp28_sd(A, B) \
- _mm_rcp28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rcp28_sd(S, M, A, B) \
- _mm_mask_rcp28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rcp28_sd(M, A, B) \
- _mm_maskz_rcp28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#endif /* __AVX512ERINTRIN_H */
diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index bb7692efb78ff..3bb7fb0fb8a6b 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -160,7 +160,6 @@
#define bit_CLFLUSHOPT 0x00800000
#define bit_CLWB 0x01000000
#define bit_AVX512PF 0x04000000
-#define bit_AVX512ER 0x08000000
#define bit_AVX512CD 0x10000000
#define bit_SHA 0x20000000
#define bit_AVX512BW 0x40000000
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 508696d3725b9..649be82ce8c1e 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -151,10 +151,6 @@
#include <avx512vldqintrin.h>
#endif
-#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512ER__)
-#include <avx512erintrin.h>
-#endif
-
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
#include <avx512ifmaintrin.h>
#endif
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index f2dc8e9dd0050..32a8ff9e44d2f 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -6802,15 +6802,9 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_vcvttph2udq512_mask:
case X86::BI__builtin_ia32_vcvttph2qq512_mask:
case X86::BI__builtin_ia32_vcvttph2uqq512_mask:
- case X86::BI__builtin_ia32_exp2pd_mask:
- case X86::BI__builtin_ia32_exp2ps_mask:
case X86::BI__builtin_ia32_getexppd512_mask:
case X86::BI__builtin_ia32_getexpps512_mask:
case X86::BI__builtin_ia32_getexpph512_mask:
- case X86::BI__builtin_ia32_rcp28pd_mask:
- case X86::BI__builtin_ia32_rcp28ps_mask:
- case X86::BI__builtin_ia32_rsqrt28pd_mask:
- case X86::BI__builtin_ia32_rsqrt28ps_mask:
case X86::BI__builtin_ia32_vcomisd:
case X86::BI__builtin_ia32_vcomiss:
case X86::BI__builtin_ia32_vcomish:
@@ -6837,16 +6831,12 @@ bool Sema::CheckX86BuiltinRoundingOrSAE(unsigned BuiltinID, CallExpr *TheCall) {
case X86::BI__builtin_ia32_minsd_round_mask:
case X86::BI__builtin_ia32_minss_round_mask:
case X86::BI__builtin_ia32_minsh_round_mask:
- case X86::BI__builtin_ia32_rcp28sd_round_mask:
- case X86::BI__builtin_ia32_rcp28ss_round_mask:
case X86::BI__builtin_ia32_reducepd512_mask:
case X86::BI__builtin_ia32_reduceps512_mask:
case X86::BI__builtin_ia32_reduceph512_mask:
case X86::BI__builtin_ia32_rndscalepd_mask:
case X86::BI__builtin_ia32_rndscaleps_mask:
case X86::BI__builtin_ia32_rndscaleph_mask:
- case X86::BI__builtin_ia32_rsqrt28sd_round_mask:
- case X86::BI__builtin_ia32_rsqrt28ss_round_mask:
ArgNum = 4;
break;
case X86::BI__builtin_ia32_fixupimmpd512_mask:
diff --git a/clang/test/CodeGen/X86/avx512er-builtins.c b/clang/test/CodeGen/X86/avx512er-builtins.c
deleted file mode 100644
index 11ec6aabec1e3..0000000000000
--- a/clang/test/CodeGen/X86/avx512er-builtins.c
+++ /dev/null
@@ -1,347 +0,0 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512f -target-feature +avx512er -emit-llvm -o - -Wall | FileCheck %s
-
-
-#include <immintrin.h>
-
-__m512d test_mm512_rsqrt28_round_pd(__m512d a) {
- // CHECK-LABEL: @test_mm512_rsqrt28_round_pd
- // CHECK: @llvm.x86.avx512.rsqrt28.pd
- return _mm512_rsqrt28_round_pd(a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_mask_rsqrt28_round_pd(__m512d s, __mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_mask_rsqrt28_round_pd
- // CHECK: @llvm.x86.avx512.rsqrt28.pd
- return _mm512_mask_rsqrt28_round_pd(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_maskz_rsqrt28_round_pd(__mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_pd
- // CHECK: @llvm.x86.avx512.rsqrt28.pd
- return _mm512_maskz_rsqrt28_round_pd(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_rsqrt28_pd(__m512d a) {
- // CHECK-LABEL: @test_mm512_rsqrt28_pd
- // CHECK: @llvm.x86.avx512.rsqrt28.pd
- return _mm512_rsqrt28_pd(a);
-}
-
-__m512d test_mm512_mask_rsqrt28_pd(__m512d s, __mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_mask_rsqrt28_pd
- // CHECK: @llvm.x86.avx512.rsqrt28.pd
- return _mm512_mask_rsqrt28_pd(s, m, a);
-}
-
-__m512d test_mm512_maskz_rsqrt28_pd(__mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_maskz_rsqrt28_pd
- // CHECK: @llvm.x86.avx512.rsqrt28.pd
- return _mm512_maskz_rsqrt28_pd(m, a);
-}
-
-__m512 test_mm512_rsqrt28_round_ps(__m512 a) {
- // CHECK-LABEL: @test_mm512_rsqrt28_round_ps
- // CHECK: @llvm.x86.avx512.rsqrt28.ps
- return _mm512_rsqrt28_round_ps(a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_mask_rsqrt28_round_ps(__m512 s, __mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_mask_rsqrt28_round_ps
- // CHECK: @llvm.x86.avx512.rsqrt28.ps
- return _mm512_mask_rsqrt28_round_ps(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_maskz_rsqrt28_round_ps(__mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_maskz_rsqrt28_round_ps
- // CHECK: @llvm.x86.avx512.rsqrt28.ps
- return _mm512_maskz_rsqrt28_round_ps(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_rsqrt28_ps(__m512 a) {
- // CHECK-LABEL: @test_mm512_rsqrt28_ps
- // CHECK: @llvm.x86.avx512.rsqrt28.ps
- return _mm512_rsqrt28_ps(a);
-}
-
-__m512 test_mm512_mask_rsqrt28_ps(__m512 s, __mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_mask_rsqrt28_ps
- // CHECK: @llvm.x86.avx512.rsqrt28.ps
- return _mm512_mask_rsqrt28_ps(s, m, a);
-}
-
-__m512 test_mm512_maskz_rsqrt28_ps(__mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_maskz_rsqrt28_ps
- // CHECK: @llvm.x86.avx512.rsqrt28.ps
- return _mm512_maskz_rsqrt28_ps(m, a);
-}
-
-__m128 test_mm_rsqrt28_round_ss(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_rsqrt28_round_ss
- // CHECK: @llvm.x86.avx512.rsqrt28.ss
- return _mm_rsqrt28_round_ss(a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_mask_rsqrt28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_mask_rsqrt28_round_ss
- // CHECK: @llvm.x86.avx512.rsqrt28.ss
- return _mm_mask_rsqrt28_round_ss(s, m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_maskz_rsqrt28_round_ss(__mmask16 m, __m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_maskz_rsqrt28_round_ss
- // CHECK: @llvm.x86.avx512.rsqrt28.ss
- return _mm_maskz_rsqrt28_round_ss(m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_rsqrt28_ss(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_rsqrt28_ss
- // CHECK: @llvm.x86.avx512.rsqrt28.ss
- return _mm_rsqrt28_ss(a, b);
-}
-
-__m128 test_mm_mask_rsqrt28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_mask_rsqrt28_ss
- // CHECK: @llvm.x86.avx512.rsqrt28.ss
- return _mm_mask_rsqrt28_ss(s, m, a, b);
-}
-
-__m128 test_mm_maskz_rsqrt28_ss(__mmask16 m, __m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_maskz_rsqrt28_ss
- // CHECK: @llvm.x86.avx512.rsqrt28.ss
- return _mm_maskz_rsqrt28_ss(m, a, b);
-}
-
-__m128d test_mm_rsqrt28_round_sd(__m128d a, __m128d b) {
- // CHECK-LABEL: @test_mm_rsqrt28_round_sd
- // CHECK: @llvm.x86.avx512.rsqrt28.sd
- return _mm_rsqrt28_round_sd(a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_mask_rsqrt28_round_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) {
- // CHECK-LABEL: @test_mm_mask_rsqrt28_round_sd
- // CHECK: @llvm.x86.avx512.rsqrt28.sd
- return _mm_mask_rsqrt28_round_sd(s, m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_maskz_rsqrt28_round_sd(__mmask8 m, __m128d a, __m128d b) {
- // CHECK-LABEL: @test_mm_maskz_rsqrt28_round_sd
- // CHECK: @llvm.x86.avx512.rsqrt28.sd
- return _mm_maskz_rsqrt28_round_sd(m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_rcp28_round_pd(__m512d a) {
- // CHECK-LABEL: @test_mm512_rcp28_round_pd
- // CHECK: @llvm.x86.avx512.rcp28.pd
- return _mm512_rcp28_round_pd(a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_mask_rcp28_round_pd(__m512d s, __mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_mask_rcp28_round_pd
- // CHECK: @llvm.x86.avx512.rcp28.pd
- return _mm512_mask_rcp28_round_pd(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_maskz_rcp28_round_pd(__mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_maskz_rcp28_round_pd
- // CHECK: @llvm.x86.avx512.rcp28.pd
- return _mm512_maskz_rcp28_round_pd(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_rcp28_pd(__m512d a) {
- // CHECK-LABEL: @test_mm512_rcp28_pd
- // CHECK: @llvm.x86.avx512.rcp28.pd
- return _mm512_rcp28_pd(a);
-}
-
-__m512d test_mm512_mask_rcp28_pd(__m512d s, __mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_mask_rcp28_pd
- // CHECK: @llvm.x86.avx512.rcp28.pd
- return _mm512_mask_rcp28_pd(s, m, a);
-}
-
-__m512d test_mm512_maskz_rcp28_pd(__mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_maskz_rcp28_pd
- // CHECK: @llvm.x86.avx512.rcp28.pd
- return _mm512_maskz_rcp28_pd(m, a);
-}
-
-__m512 test_mm512_rcp28_round_ps(__m512 a) {
- // CHECK-LABEL: @test_mm512_rcp28_round_ps
- // CHECK: @llvm.x86.avx512.rcp28.ps
- return _mm512_rcp28_round_ps(a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_mask_rcp28_round_ps(__m512 s, __mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_mask_rcp28_round_ps
- // CHECK: @llvm.x86.avx512.rcp28.ps
- return _mm512_mask_rcp28_round_ps(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_maskz_rcp28_round_ps(__mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_maskz_rcp28_round_ps
- // CHECK: @llvm.x86.avx512.rcp28.ps
- return _mm512_maskz_rcp28_round_ps(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_rcp28_ps(__m512 a) {
- // CHECK-LABEL: @test_mm512_rcp28_ps
- // CHECK: @llvm.x86.avx512.rcp28.ps
- return _mm512_rcp28_ps(a);
-}
-
-__m512 test_mm512_mask_rcp28_ps(__m512 s, __mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_mask_rcp28_ps
- // CHECK: @llvm.x86.avx512.rcp28.ps
- return _mm512_mask_rcp28_ps(s, m, a);
-}
-
-__m512 test_mm512_maskz_rcp28_ps(__mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_maskz_rcp28_ps
- // CHECK: @llvm.x86.avx512.rcp28.ps
- return _mm512_maskz_rcp28_ps(m, a);
-}
-
-__m128 test_mm_rcp28_round_ss(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_rcp28_round_ss
- // CHECK: @llvm.x86.avx512.rcp28.ss
- return _mm_rcp28_round_ss(a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_mask_rcp28_round_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_mask_rcp28_round_ss
- // CHECK: @llvm.x86.avx512.rcp28.ss
- return _mm_mask_rcp28_round_ss(s, m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_maskz_rcp28_round_ss(__mmask16 m, __m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_maskz_rcp28_round_ss
- // CHECK: @llvm.x86.avx512.rcp28.ss
- return _mm_maskz_rcp28_round_ss(m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128 test_mm_rcp28_ss(__m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_rcp28_ss
- // CHECK: @llvm.x86.avx512.rcp28.ss
- return _mm_rcp28_ss(a, b);
-}
-
-__m128 test_mm_mask_rcp28_ss(__m128 s, __mmask16 m, __m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_mask_rcp28_ss
- // CHECK: @llvm.x86.avx512.rcp28.ss
- return _mm_mask_rcp28_ss(s, m, a, b);
-}
-
-__m128 test_mm_maskz_rcp28_ss(__mmask16 m, __m128 a, __m128 b) {
- // CHECK-LABEL: @test_mm_maskz_rcp28_ss
- // CHECK: @llvm.x86.avx512.rcp28.ss
- return _mm_maskz_rcp28_ss(m, a, b);
-}
-
-__m128d test_mm_rcp28_round_sd(__m128d a, __m128d b) {
- // CHECK-LABEL: @test_mm_rcp28_round_sd
- // CHECK: @llvm.x86.avx512.rcp28.sd
- return _mm_rcp28_round_sd(a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_mask_rcp28_round_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) {
- // CHECK-LABEL: @test_mm_mask_rcp28_round_sd
- // CHECK: @llvm.x86.avx512.rcp28.sd
- return _mm_mask_rcp28_round_sd(s, m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_maskz_rcp28_round_sd(__mmask8 m, __m128d a, __m128d b) {
- // CHECK-LABEL: @test_mm_maskz_rcp28_round_sd
- // CHECK: @llvm.x86.avx512.rcp28.sd
- return _mm_maskz_rcp28_round_sd(m, a, b, _MM_FROUND_NO_EXC);
-}
-
-__m128d test_mm_rcp28_sd(__m128d a, __m128d b) {
- // CHECK-LABEL: @test_mm_rcp28_sd
- // CHECK: @llvm.x86.avx512.rcp28.sd
- return _mm_rcp28_sd(a, b);
-}
-
-__m128d test_mm_mask_rcp28_sd(__m128d s, __mmask8 m, __m128d a, __m128d b) {
- // CHECK-LABEL: @test_mm_mask_rcp28_sd
- // CHECK: @llvm.x86.avx512.rcp28.sd
- return _mm_mask_rcp28_sd(s, m, a, b);
-}
-
-__m128d test_mm_maskz_rcp28_sd(__mmask8 m, __m128d a, __m128d b) {
- // CHECK-LABEL: @test_mm_maskz_rcp28_sd
- // CHECK: @llvm.x86.avx512.rcp28.sd
- return _mm_maskz_rcp28_sd(m, a, b);
-}
-
-__m512d test_mm512_exp2a23_round_pd(__m512d a) {
- // CHECK-LABEL: @test_mm512_exp2a23_round_pd
- // CHECK: @llvm.x86.avx512.exp2.pd
- return _mm512_exp2a23_round_pd(a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_mask_exp2a23_round_pd(__m512d s, __mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_mask_exp2a23_round_pd
- // CHECK: @llvm.x86.avx512.exp2.pd
- return _mm512_mask_exp2a23_round_pd(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_maskz_exp2a23_round_pd(__mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_maskz_exp2a23_round_pd
- // CHECK: @llvm.x86.avx512.exp2.pd
- return _mm512_maskz_exp2a23_round_pd(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512d test_mm512_exp2a23_pd(__m512d a) {
- // CHECK-LABEL: @test_mm512_exp2a23_pd
- // CHECK: @llvm.x86.avx512.exp2.pd
- return _mm512_exp2a23_pd(a);
-}
-
-__m512d test_mm512_mask_exp2a23_pd(__m512d s, __mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_mask_exp2a23_pd
- // CHECK: @llvm.x86.avx512.exp2.pd
- return _mm512_mask_exp2a23_pd(s, m, a);
-}
-
-__m512d test_mm512_maskz_exp2a23_pd(__mmask8 m, __m512d a) {
- // CHECK-LABEL: @test_mm512_maskz_exp2a23_pd
- // CHECK: @llvm.x86.avx512.exp2.pd
- return _mm512_maskz_exp2a23_pd(m, a);
-}
-
-__m512 test_mm512_exp2a23_round_ps(__m512 a) {
- // CHECK-LABEL: @test_mm512_exp2a23_round_ps
- // CHECK: @llvm.x86.avx512.exp2.ps
- return _mm512_exp2a23_round_ps(a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_mask_exp2a23_round_ps(__m512 s, __mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_mask_exp2a23_round_ps
- // CHECK: @llvm.x86.avx512.exp2.ps
- return _mm512_mask_exp2a23_round_ps(s, m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_maskz_exp2a23_round_ps(__mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_maskz_exp2a23_round_ps
- // CHECK: @llvm.x86.avx512.exp2.ps
- return _mm512_maskz_exp2a23_round_ps(m, a, _MM_FROUND_NO_EXC);
-}
-
-__m512 test_mm512_exp2a23_ps(__m512 a) {
- // CHECK-LABEL: @test_mm512_exp2a23_ps
- // CHECK: @llvm.x86.avx512.exp2.ps
- return _mm512_exp2a23_ps(a);
-}
-
-__m512 test_mm512_mask_exp2a23_ps(__m512 s, __mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_mask_exp2a23_ps
- // CHECK: @llvm.x86.avx512.exp2.ps
- return _mm512_mask_exp2a23_ps(s, m, a);
-}
-
-__m512 test_mm512_maskz_exp2a23_ps(__mmask16 m, __m512 a) {
- // CHECK-LABEL: @test_mm512_maskz_exp2a23_ps
- // CHECK: @llvm.x86.avx512.exp2.ps
- return _mm512_maskz_exp2a23_ps(m, a);
-}
-
diff --git a/clang/test/CodeGen/function-target-features.c b/clang/test/CodeGen/function-target-features.c
index 0d8bfc7e4e44c..d6a73ff8224b6 100644
--- a/clang/test/CodeGen/function-target-features.c
+++ b/clang/test/CodeGen/function-target-features.c
@@ -4,7 +4,7 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-FEATURE
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx | FileCheck %s -check-prefix=AVX-NO-CPU
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f -target-feature +avx512er | FileCheck %s -check-prefix=TWO-AVX
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-feature +avx512f -target-feature +avx512bw | FileCheck %s -check-prefix=TWO-AVX
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 | FileCheck %s -check-prefix=CORE-CPU
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu corei7 -target-feature +avx | FileCheck %s -check-prefix=CORE-CPU-AND-FEATURES
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s -target-cpu x86-64 | FileCheck %s -check-prefix=X86-64-CPU
@@ -17,7 +17,7 @@ void foo(void) {}
// AVX-FEATURE: "target-features"{{.*}}+avx
// AVX-NO-CPU-NOT: target-cpu
-// TWO-AVX: "target-features"={{.*}}+avx512er{{.*}}+avx512f
+// TWO-AVX: "target-features"={{.*}}+avx512bw{{.*}}+avx512f
// CORE-CPU: "target-cpu"="corei7"
// CORE-CPU-AND-FEATURES: "target-cpu"="corei7" "target-features"={{.*}}+avx
// X86-64-CPU: "target-cpu"="x86-64"
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index b438e50848a4b..79085d3c0c722 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -68,7 +68,6 @@ void verifyfeaturestrings(void) {
(void)__builtin_cpu_supports("avx512bw");
(void)__builtin_cpu_supports("avx512dq");
(void)__builtin_cpu_supports("avx512cd");
- (void)__builtin_cpu_supports("avx512er");
(void)__builtin_cpu_supports("avx512pf");
(void)__builtin_cpu_supports("avx512vbmi");
(void)__builtin_cpu_supports("avx512ifma");
diff --git a/clang/test/Driver/cl-x86-flags.c b/clang/test/Driver/cl-x86-flags.c
index 716b02f02a15e..9a3a65faaba9a 100644
--- a/clang/test/Driver/cl-x86-flags.c
+++ b/clang/test/Driver/cl-x86-flags.c
@@ -72,7 +72,6 @@
// RUN: %clang_cl -m32 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify=KNL1 -DTEST_32_ARCH_AVX512F -- %s
// KNL1-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// KNL1-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
-// KNL1-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
#if defined(TEST_32_ARCH_AVX512F)
#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__
#error fail
@@ -115,7 +114,6 @@
// RUN: %clang_cl -m64 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify=KNL2 -DTEST_64_ARCH_AVX512F -- %s
// KNL2-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// KNL2-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
-// KNL2-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
#if defined(TEST_64_ARCH_AVX512F)
#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__
#error fail
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 25f8f66bc3213..0afe8f6da45bb 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -21,10 +21,10 @@
// SSE4-AES: "-target-feature" "+sse4.2" "-target-feature" "+aes"
// NO-SSE4-AES: "-target-feature" "-sse4.1" "-target-feature" "-aes"
-// RUN: %clang --target=i386 -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512er -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### 2>&1 | FileCheck -check-prefix=AVX %s
-// RUN: %clang --target=i386 -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512er -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### 2>&1 | FileCheck -check-prefix=NO-AVX %s
-// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512er" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma"
-// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512er" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma"
+// RUN: %clang --target=i386 -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### 2>&1 | FileCheck -check-prefix=AVX %s
+// RUN: %clang --target=i386 -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### 2>&1 | FileCheck -check-prefix=NO-AVX %s
+// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma"
+// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma"
// RUN: %clang --target=i386 -march=i386 -mpclmul -mrdrnd -mfsgsbase -mbmi -mbmi2 %s -### 2>&1 | FileCheck -check-prefix=BMI %s
// RUN: %clang --target=i386 -march=i386 -mno-pclmul -mno-rdrnd -mno-fsgsbase -mno-bmi -mno-bmi2 %s -### 2>&1 | FileCheck -check-prefix=NO-BMI %s
diff --git a/clang/test/Frontend/x86-target-cpu.c b/clang/test/Frontend/x86-target-cpu.c
index 6b99b2c8574ae..8c701cc364390 100644
--- a/clang/test/Frontend/x86-target-cpu.c
+++ b/clang/test/Frontend/x86-target-cpu.c
@@ -18,11 +18,9 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knl -verify=knl %s
// knl-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// knl-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
-// knl-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knm -verify=knm %s
// knm-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// knm-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
-// knm-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu bonnell -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu silvermont -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu k8 -verify %s
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index ca51f2fc22c51..e14101f4d9961 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -793,7 +793,6 @@
// CHECK_KNL_M32: #define __AES__ 1
// CHECK_KNL_M32: #define __AVX2__ 1
// CHECK_KNL_M32: #define __AVX512CD__ 1
-// CHECK_KNL_M32: #define __AVX512ER__ 1
// CHECK_KNL_M32: #define __AVX512F__ 1
// CHECK_KNL_M32: #define __AVX512PF__ 1
// CHECK_KNL_M32: #define __AVX__ 1
@@ -832,7 +831,6 @@
// CHECK_KNL_M64: #define __AES__ 1
// CHECK_KNL_M64: #define __AVX2__ 1
// CHECK_KNL_M64: #define __AVX512CD__ 1
-// CHECK_KNL_M64: #define __AVX512ER__ 1
// CHECK_KNL_M64: #define __AVX512F__ 1
// CHECK_KNL_M64: #define __AVX512PF__ 1
// CHECK_KNL_M64: #define __AVX__ 1
@@ -874,7 +872,6 @@
// CHECK_KNM_M32: #define __AES__ 1
// CHECK_KNM_M32: #define __AVX2__ 1
// CHECK_KNM_M32: #define __AVX512CD__ 1
-// CHECK_KNM_M32: #define __AVX512ER__ 1
// CHECK_KNM_M32: #define __AVX512F__ 1
// CHECK_KNM_M32: #define __AVX512PF__ 1
// CHECK_KNM_M32: #define __AVX512VPOPCNTDQ__ 1
@@ -911,7 +908,6 @@
// CHECK_KNM_M64: #define __AES__ 1
// CHECK_KNM_M64: #define __AVX2__ 1
// CHECK_KNM_M64: #define __AVX512CD__ 1
-// CHECK_KNM_M64: #define __AVX512ER__ 1
// CHECK_KNM_M64: #define __AVX512F__ 1
// CHECK_KNM_M64: #define __AVX512PF__ 1
// CHECK_KNM_M64: #define __AVX512VPOPCNTDQ__ 1
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 57104c9e7a500..cd8cf85622fa9 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -90,22 +90,6 @@
// AVX512CD: #define __SSE__ 1
// AVX512CD: #define __SSSE3__ 1
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512ER %s
-
-// AVX512ER: #define __AVX2__ 1
-// AVX512ER: #define __AVX512ER__ 1
-// AVX512ER: #define __AVX512F__ 1
-// AVX512ER: #define __AVX__ 1
-// AVX512ER: #define __EVEX512__ 1
-// AVX512ER: #define __SSE2_MATH__ 1
-// AVX512ER: #define __SSE2__ 1
-// AVX512ER: #define __SSE3__ 1
-// AVX512ER: #define __SSE4_1__ 1
-// AVX512ER: #define __SSE4_2__ 1
-// AVX512ER: #define __SSE_MATH__ 1
-// AVX512ER: #define __SSE__ 1
-// AVX512ER: #define __SSSE3__ 1
-
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512PF %s
// AVX512PF: #define __AVX2__ 1
@@ -640,14 +624,12 @@
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NOEVEX512 %s
// NOEVEX512-NOT: #define __AVX512F__ 1
// NOEVEX512-NOT: #define __EVEX256__ 1
// NOEVEX512-NOT: #define __EVEX512__ 1
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512f -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512cd -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512er -mno-evex512 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512NOEVEX512 %s
// AVX512NOEVEX512: #define __AVX512F__ 1
// AVX512NOEVEX512-NOT: #define __EVEX256__ 1
// AVX512NOEVEX512-NOT: #define __EVEX512__ 1
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index fdc2b0fb7f80f..0fda7e66c06a7 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -3843,58 +3843,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
DefaultAttrsIntrinsic<[llvm_v16f32_ty],
[llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty],
[IntrNoMem]>;
-
- def int_x86_avx512_rcp28_ps : ClangBuiltin<"__builtin_ia32_rcp28ps_mask">,
- DefaultAttrsIntrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
- llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_rcp28_pd : ClangBuiltin<"__builtin_ia32_rcp28pd_mask">,
- DefaultAttrsIntrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
- llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_exp2_ps : ClangBuiltin<"__builtin_ia32_exp2ps_mask">,
- DefaultAttrsIntrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
- llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_exp2_pd : ClangBuiltin<"__builtin_ia32_exp2pd_mask">,
- DefaultAttrsIntrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
- llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
-
- def int_x86_avx512_rcp28_ss : ClangBuiltin<"__builtin_ia32_rcp28ss_round_mask">,
- DefaultAttrsIntrinsic<[llvm_v4f32_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_rcp28_sd : ClangBuiltin<"__builtin_ia32_rcp28sd_round_mask">,
- DefaultAttrsIntrinsic<[llvm_v2f64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_rsqrt28_ps : ClangBuiltin<"__builtin_ia32_rsqrt28ps_mask">,
- DefaultAttrsIntrinsic<[llvm_v16f32_ty],
- [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty,
- llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_rsqrt28_pd : ClangBuiltin<"__builtin_ia32_rsqrt28pd_mask">,
- DefaultAttrsIntrinsic<[llvm_v8f64_ty],
- [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty,
- llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<3>>]>;
- def int_x86_avx512_rsqrt28_ss : ClangBuiltin<"__builtin_ia32_rsqrt28ss_round_mask">,
- DefaultAttrsIntrinsic<[llvm_v4f32_ty],
- [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_rsqrt28_sd : ClangBuiltin<"__builtin_ia32_rsqrt28sd_round_mask">,
- DefaultAttrsIntrinsic<[llvm_v2f64_ty],
- [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty,
- llvm_i8_ty, llvm_i32_ty],
- [IntrNoMem, ImmArg<ArgIndex<4>>]>;
def int_x86_avx512_psad_bw_512 : ClangBuiltin<"__builtin_ia32_psadbw512">,
DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_v64i8_ty, llvm_v64i8_ty],
[IntrNoMem, Commutative]>;
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 54642ecde18c0..686237ab6c6f8 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -124,9 +124,6 @@ def FeatureEVEX512 : SubtargetFeature<"evex512", "HasEVEX512", "true",
def FeatureAVX512 : SubtargetFeature<"avx512f", "X86SSELevel", "AVX512",
"Enable AVX-512 instructions",
[FeatureAVX2, FeatureFMA, FeatureF16C]>;
-def FeatureERI : SubtargetFeature<"avx512er", "HasERI", "true",
- "Enable AVX-512 Exponential and Reciprocal Instructions",
- [FeatureAVX512]>;
def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
"Enable AVX-512 Conflict Detection Instructions",
[FeatureAVX512]>;
@@ -1312,7 +1309,6 @@ def ProcessorFeatures {
FeatureFSGSBase,
FeatureAVX512,
FeatureEVEX512,
- FeatureERI,
FeatureCDI,
FeaturePFI,
FeaturePREFETCHWT1,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 0723328d40e3e..c77146b37df2e 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -9265,6 +9265,33 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
}
+multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain, hasNoSchedulingInfo = 1 in {
+ defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (null_frag)>, Sched<[WriteMove]>;
+ defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.RC:$src2), OpcodeStr,
+ "{sae}, $src2, $src1", "$src1, $src2, {sae}",
+ (null_frag)>, Sched<[WriteMove]>, EVEX_B;
+ defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
+ "$src2, $src1", "$src1, $src2",
+ (null_frag)>, Sched<[WriteMove]>;
+ }
+}
+
+multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr> {
+ defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info>,
+ EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
+ defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info>,
+ EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
+}
+
+defm VRCP28 : avx512_eri_s_ass<0xCB, "vrcp28">;
+defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28">;
+
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
defm SSZ : avx512_fp28_s<opc, OpcodeStr#"ss", f32x_info, OpNode, OpNodeSAE,
@@ -9280,13 +9307,6 @@ multiclass avx512_vgetexpsh<bits<8> opc, string OpcodeStr, SDNode OpNode,
EVEX_CD8<16, CD8VT1>, T_MAP6, PD, EVEX, VVVV;
}
-let Predicates = [HasERI] in {
- defm VRCP28 : avx512_eri_s<0xCB, "vrcp28", X86rcp28s, X86rcp28SAEs,
- SchedWriteFRcp.Scl>;
- defm VRSQRT28 : avx512_eri_s<0xCD, "vrsqrt28", X86rsqrt28s, X86rsqrt28SAEs,
- SchedWriteFRsqrt.Scl>;
-}
-
defm VGETEXP : avx512_eri_s<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
SchedWriteFRnd.Scl>,
avx512_vgetexpsh<0x43, "vgetexp", X86fgetexps, X86fgetexpSAEs,
@@ -9325,6 +9345,43 @@ multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
EVEX_B, Sched<[sched]>;
}
+multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain, hasNoSchedulingInfo = 1 in {
+ defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr, "$src", "$src",
+ (null_frag)>, Sched<[WriteMove]>;
+
+ defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpcodeStr, "$src", "$src",
+ (null_frag)>, Sched<[WriteMove]>;
+
+ defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.ScalarMemOp:$src), OpcodeStr,
+ "${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
+ (null_frag)>, Sched<[WriteMove]>, EVEX_B;
+ }
+}
+multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
+ defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
+ (ins _.RC:$src), OpcodeStr,
+ "{sae}, $src", "$src, {sae}",
+ (null_frag)>, Sched<[WriteMove]>, EVEX_B;
+}
+
+multiclass avx512_eri_ass<bits<8> opc, string OpcodeStr> {
+ defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info>,
+ avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info>,
+ T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
+ defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info>,
+ avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info>,
+ T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
+}
+
+defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28">, EVEX;
+defm VRCP28 : avx512_eri_ass<0xCA, "vrcp28">, EVEX;
+defm VEXP2 : avx512_eri_ass<0xC8, "vexp2">, EVEX;
+
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86SchedWriteWidths sched> {
defm PSZ : avx512_fp28_p<opc, OpcodeStr#"ps", v16f32_info, OpNode, sched.ZMM>,
@@ -9367,14 +9424,6 @@ multiclass avx512_vgetexp_fp16<bits<8> opc, string OpcodeStr, SDNode OpNode,
EVEX_V256, T_MAP6, PD, EVEX_CD8<16, CD8VF>;
}
}
-let Predicates = [HasERI] in {
- defm VRSQRT28 : avx512_eri<0xCC, "vrsqrt28", X86rsqrt28, X86rsqrt28SAE,
- SchedWriteFRsqrt>, EVEX;
- defm VRCP28 : avx512_eri<0xCA, "vrcp28", X86rcp28, X86rcp28SAE,
- SchedWriteFRcp>, EVEX;
- defm VEXP2 : avx512_eri<0xC8, "vexp2", X86exp2, X86exp2SAE,
- SchedWriteFAdd>, EVEX;
-}
defm VGETEXP : avx512_eri<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
SchedWriteFRnd>,
avx512_vgetexp_fp16<0x42, "vgetexp", X86fgetexp, X86fgetexpSAE,
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index 9f2709d6b1a20..ef6a2d7eff638 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -80,7 +80,6 @@ def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
def HasCDI : Predicate<"Subtarget->hasCDI()">;
def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">;
def HasPFI : Predicate<"Subtarget->hasPFI()">;
-def HasERI : Predicate<"Subtarget->hasERI()">;
def HasDQI : Predicate<"Subtarget->hasDQI()">;
def NoDQI : Predicate<"!Subtarget->hasDQI()">;
def HasBWI : Predicate<"Subtarget->hasBWI()">;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 3bb2f07b5f1a1..9a0a4e8657035 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -454,8 +454,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_dbpsadbw_512, INTR_TYPE_3OP_IMM8, X86ISD::DBPSADBW, 0),
X86_INTRINSIC_DATA(avx512_div_pd_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
X86_INTRINSIC_DATA(avx512_div_ps_512, INTR_TYPE_2OP, ISD::FDIV, X86ISD::FDIV_RND),
- X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
- X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::EXP2, X86ISD::EXP2_SAE),
X86_INTRINSIC_DATA(avx512_fpclass_pd_128, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_fpclass_pd_256, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
X86_INTRINSIC_DATA(avx512_fpclass_pd_512, INTR_TYPE_2OP, X86ISD::VFPCLASS, 0),
@@ -908,10 +906,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_rcp14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
X86_INTRINSIC_DATA(avx512_rcp14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RCP14S, 0),
- X86_INTRINSIC_DATA(avx512_rcp28_pd, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
- X86_INTRINSIC_DATA(avx512_rcp28_ps, INTR_TYPE_1OP_MASK_SAE, X86ISD::RCP28, X86ISD::RCP28_SAE),
- X86_INTRINSIC_DATA(avx512_rcp28_sd, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
- X86_INTRINSIC_DATA(avx512_rcp28_ss, INTR_TYPE_SCALAR_MASK_SAE, X86ISD::RCP28S, X86ISD::RCP28S_SAE),
X86_INTRINSIC_DATA(avx512_rsqrt14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
@@ -920,10 +914,6 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_rsqrt14_ps_512, INTR_TYPE_1OP_MASK, X86ISD::RSQRT14, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_sd, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
X86_INTRINSIC_DATA(avx512_rsqrt14_ss, INTR_TYPE_SCALAR_MASK, X86ISD::RSQRT14S, 0),
- X86_INTRINSIC_DATA(avx512_rsqrt28_pd, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
- X86_INTRINSIC_DATA(avx512_rsqrt28_ps, INTR_TYPE_1OP_MASK_SAE,X86ISD::RSQRT28, X86ISD::RSQRT28_SAE),
- X86_INTRINSIC_DATA(avx512_rsqrt28_sd, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
- X86_INTRINSIC_DATA(avx512_rsqrt28_ss, INTR_TYPE_SCALAR_MASK_SAE,X86ISD::RSQRT28S, X86ISD::RSQRT28S_SAE),
X86_INTRINSIC_DATA(avx512_sitofp_round, INTR_TYPE_1OP, ISD::SINT_TO_FP, X86ISD::SINT_TO_FP_RND),
X86_INTRINSIC_DATA(avx512_sqrt_pd_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
X86_INTRINSIC_DATA(avx512_sqrt_ps_512, INTR_TYPE_1OP, ISD::FSQRT, X86ISD::FSQRT_RND),
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index c5156c6cb802c..2fcdc1cedb9a2 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1005,8 +1005,6 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
CPU = "cascadelake";
} else if (testFeature(X86::FEATURE_AVX512VL)) {
CPU = "skylake-avx512";
- } else if (testFeature(X86::FEATURE_AVX512ER)) {
- CPU = "knl";
} else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
if (testFeature(X86::FEATURE_SHA))
CPU = "goldmont";
@@ -1302,8 +1300,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_CLFLUSHOPT);
if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512PF);
- if (HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save)
- setFeature(X86::FEATURE_AVX512ER);
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512CD);
if (HasLeaf7 && ((EBX >> 29) & 1))
@@ -1811,7 +1807,6 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
- Features["avx512er"] = HasLeaf7 && ((EBX >> 27) & 1) && HasAVX512Save;
Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index efe392b945452..ebdb4b09d79ce 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -97,7 +97,7 @@ constexpr FeatureBitset FeaturesBroadwell =
// Knights Landing has feature parity with Broadwell.
constexpr FeatureBitset FeaturesKNL =
FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 |
- FeatureAVX512CD | FeatureAVX512ER | FeatureAVX512PF | FeaturePREFETCHWT1;
+ FeatureAVX512CD | FeatureAVX512PF | FeaturePREFETCHWT1;
constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ;
// Intel Skylake processors.
diff --git a/llvm/test/CodeGen/X86/avx512er-intrinsics.ll b/llvm/test/CodeGen/X86/avx512er-intrinsics.ll
deleted file mode 100644
index fa4025f76b57d..0000000000000
--- a/llvm/test/CodeGen/X86/avx512er-intrinsics.ll
+++ /dev/null
@@ -1,306 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512er --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
-
-define <16 x float> @test_rsqrt28_ps(<16 x float> %a0) {
-; CHECK-LABEL: test_rsqrt28_ps:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcc,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
- ret <16 x float> %res
-}
-
-define <16 x float> @test1_rsqrt28_ps(<16 x float> %a0, <16 x float> %a1) {
-; CHECK-LABEL: test1_rsqrt28_ps:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
-; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcc,0xc8]
-; CHECK-NEXT: vmovaps %zmm1, %zmm0 # encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> %a1, i16 6, i32 8)
- ret <16 x float> %res
-}
-
-define <16 x float> @test2_rsqrt28_ps(<16 x float> %a0) {
-; CHECK-LABEL: test2_rsqrt28_ps:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
-; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; CHECK-NEXT: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 4)
- ret <16 x float> %res
-}
-
-define <16 x float> @test3_rsqrt28_ps(<16 x float> %a0) {
-; CHECK-LABEL: test3_rsqrt28_ps:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
-; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; CHECK-NEXT: vrsqrt28ps %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcc,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 6, i32 4)
- ret <16 x float> %res
-}
-
-define <16 x float> @test4_rsqrt28_ps(<16 x float> %a0) {
-; CHECK-LABEL: test4_rsqrt28_ps:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movw $6, %ax # encoding: [0x66,0xb8,0x06,0x00]
-; CHECK-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; CHECK-NEXT: vrsqrt28ps {sae}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcc,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float> %a0, <16 x float> undef, i16 6, i32 8)
- ret <16 x float> %res
-}
-
-declare <16 x float> @llvm.x86.avx512.rsqrt28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <16 x float> @test_rcp28_ps_512(<16 x float> %a0) {
-; CHECK-LABEL: test_rcp28_ps_512:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vrcp28ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xca,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.rcp28.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <8 x double> @test_rcp28_pd_512(<8 x double> %a0) {
-; CHECK-LABEL: test_rcp28_pd_512:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vrcp28pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xca,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
- ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.avx512.rcp28.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
-
-define <16 x float> @test_exp2_ps_512(<16 x float> %a0) {
-; CHECK-LABEL: test_exp2_ps_512:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vexp2ps {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xc8,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.exp2.ps(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-define <8 x double> @test_exp2_pd_512(<8 x double> %a0) {
-; CHECK-LABEL: test_exp2_pd_512:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vexp2pd {sae}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x18,0xc8,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8)
- ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.avx512.exp2.pd(<8 x double>, <8 x double>, i8, i32) nounwind readnone
-
-define <4 x float> @test_rsqrt28_ss(<4 x float> %a0) {
-; CHECK-LABEL: test_rsqrt28_ss:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcd,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
-define <4 x float> @test_rcp28_ss(<4 x float> %a0) {
-; CHECK-LABEL: test_rcp28_ss:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vrcp28ss {sae}, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0xcb,0xc0]
-; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 -1, i32 8) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
-define <4 x float> @test_rcp28_ss_load(<4 x float> %a0, ptr %a1ptr) {
-; X86-LABEL: test_rcp28_ss_load:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vrcp28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x00]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rcp28_ss_load:
-; X64: # %bb.0:
-; X64-NEXT: vrcp28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcb,0x07]
-; X64-NEXT: retq # encoding: [0xc3]
- %a1 = load <4 x float>, ptr %a1ptr
- %res = call <4 x float> @llvm.x86.avx512.rcp28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-
-define <4 x float> @test_rsqrt28_ss_load(<4 x float> %a0, ptr %a1ptr) {
-; X86-LABEL: test_rsqrt28_ss_load:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vrsqrt28ss (%eax), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x00]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_ss_load:
-; X64: # %bb.0:
-; X64-NEXT: vrsqrt28ss (%rdi), %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0xcd,0x07]
-; X64-NEXT: retq # encoding: [0xc3]
- %a1 = load <4 x float>, ptr %a1ptr
- %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a1, <4 x float> undef, i8 -1, i32 4) ; <<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-
-define <4 x float> @test_rsqrt28_ss_maskz(<4 x float> %a0, i8 %mask) {
-; X86-LABEL: test_rsqrt28_ss_maskz:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_ss_maskz:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vrsqrt28ss {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0xcd,0xc0]
-; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %a0, <4 x float> zeroinitializer, i8 %mask, i32 8) ;
- ret <4 x float> %res
-}
-
-define <4 x float> @test_rsqrt28_ss_mask(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask) {
-; X86-LABEL: test_rsqrt28_ss_mask:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
-; X86-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_ss_mask:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vrsqrt28ss {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0xcd,0xd1]
-; X64-NEXT: vmovaps %xmm2, %xmm0 # encoding: [0xc5,0xf8,0x28,0xc2]
-; X64-NEXT: retq # encoding: [0xc3]
- %res = call <4 x float> @llvm.x86.avx512.rsqrt28.ss(<4 x float> %a0, <4 x float> %b0, <4 x float> %c0, i8 %mask, i32 8) ;
- ret <4 x float> %res
-}
-
-define <2 x double> @test_rcp28_sd_mask_load(<2 x double> %a0, ptr %a1ptr, <2 x double> %a2, i8 %mask) {
-; X86-LABEL: test_rcp28_sd_mask_load:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8]
-; X86-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rcp28_sd_mask_load:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vrcp28sd %xmm0, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xcb,0xc8]
-; X64-NEXT: vmovapd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc1]
-; X64-NEXT: retq # encoding: [0xc3]
- %a1 = load <2 x double>, ptr %a1ptr
- %res = call <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> %a2, i8 %mask, i32 4) ;
- ret <2 x double> %res
-}
-declare <2 x double> @llvm.x86.avx512.rcp28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
-
-define <2 x double> @test_rsqrt28_sd_maskz_load(<2 x double> %a0, ptr %a1ptr, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_maskz_load:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_maskz_load:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vrsqrt28sd %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0xc0]
-; X64-NEXT: retq # encoding: [0xc3]
- %a1 = load <2 x double>, ptr %a1ptr
- %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
- ret <2 x double> %res
-}
-
-define <2 x double> @test_rsqrt28_sd_maskz(<2 x double> %a0, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_maskz:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_maskz:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vrsqrt28sd {sae}, %xmm0, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0xcd,0xc0]
-; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %a0, <2 x double> zeroinitializer, i8 %mask, i32 8) ;
- ret <2 x double> %res
-}
-
-define <2 x double> @test_rsqrt28_sd_mask(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_mask:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1]
-; X86-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_mask:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
-; X64-NEXT: vrsqrt28sd {sae}, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0xcd,0xd1]
-; X64-NEXT: vmovapd %xmm2, %xmm0 # encoding: [0xc5,0xf9,0x28,0xc2]
-; X64-NEXT: retq # encoding: [0xc3]
- %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %b0, <2 x double> %c0, i8 %mask, i32 8) ;
- ret <2 x double> %res
-}
-
-declare <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
-
-define <2 x double> @test_rsqrt28_sd_maskz_mem(<2 x double> %a0, ptr %ptr, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_maskz_mem:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vrsqrt28sd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x00]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_maskz_mem:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vrsqrt28sd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x07]
-; X64-NEXT: retq # encoding: [0xc3]
- %mem = load double , ptr %ptr, align 8
- %mem_v = insertelement <2 x double> undef, double %mem, i32 0
- %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
- ret <2 x double> %res
-}
-
-define <2 x double> @test_rsqrt28_sd_maskz_mem_offset(<2 x double> %a0, ptr %ptr, i8 %mask) {
-; X86-LABEL: test_rsqrt28_sd_maskz_mem_offset:
-; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
-; X86-NEXT: kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
-; X86-NEXT: vrsqrt28sd 144(%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x40,0x12]
-; X86-NEXT: retl # encoding: [0xc3]
-;
-; X64-LABEL: test_rsqrt28_sd_maskz_mem_offset:
-; X64: # %bb.0:
-; X64-NEXT: kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
-; X64-NEXT: vrsqrt28sd 144(%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0xcd,0x47,0x12]
-; X64-NEXT: retq # encoding: [0xc3]
- %ptr1 = getelementptr double, ptr %ptr, i32 18
- %mem = load double , ptr %ptr1, align 8
- %mem_v = insertelement <2 x double> undef, double %mem, i32 0
- %res = call <2 x double> @llvm.x86.avx512.rsqrt28.sd(<2 x double> %a0, <2 x double> %mem_v, <2 x double> zeroinitializer, i8 %mask, i32 4) ;
- ret <2 x double> %res
-}
-
diff --git a/llvm/test/CodeGen/X86/crc32-target-feature.ll b/llvm/test/CodeGen/X86/crc32-target-feature.ll
index ef4fafcae5dce..9dfe27e653511 100644
--- a/llvm/test/CodeGen/X86/crc32-target-feature.ll
+++ b/llvm/test/CodeGen/X86/crc32-target-feature.ll
@@ -25,5 +25,5 @@ define i32 @test3(i32 %a, i8 %b) nounwind #2 {
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
attributes #0 = { "target-features"="+crc32" }
-attributes #1 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" }
-attributes #2 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" }
+attributes #1 = { "target-features"="+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop,+crc32" }
+attributes #2 = { "target-features"="+crc32,+cx8,+fxsr,-3dnow,-3dnowa,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-mmx,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-x87,-xop" }
diff --git a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir
index 4c715b894fae8..af57d972f2246 100644
--- a/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir
+++ b/llvm/test/CodeGen/X86/unfoldMemoryOperand.mir
@@ -23,7 +23,7 @@
br i1 %6, label %4, label %5, !llvm.loop !9
}
- attributes #0 = { nofree norecurse nosync nounwind uwtable writeonly mustprogress "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="generic" }
+ attributes #0 = { nofree norecurse nosync nounwind uwtable writeonly mustprogress "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxvnni,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="generic" }
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
index 03b1aece9e870..a461f35d00dc9 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
@@ -54,4 +54,4 @@ bb10: ; preds = %bb10, %bb
}
-attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
+attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
index 0b16d80a4adbc..2cba2452440be 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
@@ -88,7 +88,7 @@ loopexit:
ret void
}
-attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512er,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
+attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
!0 = !{i32 0, i32 2147483646}
!1 = !{}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
index 5c9fe54b55212..20566005c93df 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
@@ -118,7 +118,7 @@ L44: ; preds = %L26
ret ptr addrspace(10) null
}
-attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-avx512er,-sha,-prefetchwt1,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-sha,-prefetchwt1,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
attributes #1 = { inaccessiblemem_or_argmemonly }
attributes #2 = { allocsize(1) }
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s
deleted file mode 100644
index 034fc6d83d153..0000000000000
--- a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s
+++ /dev/null
@@ -1,373 +0,0 @@
-# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
-
-vexp2pd %zmm16, %zmm19
-vexp2pd (%rax), %zmm19
-vexp2pd (%rax){1to8}, %zmm19
-vexp2pd %zmm16, %zmm19 {k1}
-vexp2pd (%rax), %zmm19 {k1}
-vexp2pd (%rax){1to8}, %zmm19 {k1}
-vexp2pd %zmm16, %zmm19 {z}{k1}
-vexp2pd (%rax), %zmm19 {z}{k1}
-vexp2pd (%rax){1to8}, %zmm19 {z}{k1}
-
-vexp2pd {sae}, %zmm16, %zmm19
-vexp2pd {sae}, %zmm16, %zmm19 {k1}
-vexp2pd {sae}, %zmm16, %zmm19 {z}{k1}
-
-vexp2ps %zmm16, %zmm19
-vexp2ps (%rax), %zmm19
-vexp2ps (%rax){1to16}, %zmm19
-vexp2ps %zmm16, %zmm19 {k1}
-vexp2ps (%rax), %zmm19 {k1}
-vexp2ps (%rax){1to16}, %zmm19 {k1}
-vexp2ps %zmm16, %zmm19 {z}{k1}
-vexp2ps (%rax), %zmm19 {z}{k1}
-vexp2ps (%rax){1to16}, %zmm19 {z}{k1}
-
-vexp2ps {sae}, %zmm16, %zmm19
-vexp2ps {sae}, %zmm16, %zmm19 {k1}
-vexp2ps {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrcp28pd %zmm16, %zmm19
-vrcp28pd (%rax), %zmm19
-vrcp28pd (%rax){1to8}, %zmm19
-vrcp28pd %zmm16, %zmm19 {k1}
-vrcp28pd (%rax), %zmm19 {k1}
-vrcp28pd (%rax){1to8}, %zmm19 {k1}
-vrcp28pd %zmm16, %zmm19 {z}{k1}
-vrcp28pd (%rax), %zmm19 {z}{k1}
-vrcp28pd (%rax){1to8}, %zmm19 {z}{k1}
-
-vrcp28pd {sae}, %zmm16, %zmm19
-vrcp28pd {sae}, %zmm16, %zmm19 {k1}
-vrcp28pd {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrcp28ps %zmm16, %zmm19
-vrcp28ps (%rax), %zmm19
-vrcp28ps (%rax){1to16}, %zmm19
-vrcp28ps %zmm16, %zmm19 {k1}
-vrcp28ps (%rax), %zmm19 {k1}
-vrcp28ps (%rax){1to16}, %zmm19 {k1}
-vrcp28ps %zmm16, %zmm19 {z}{k1}
-vrcp28ps (%rax), %zmm19 {z}{k1}
-vrcp28ps (%rax){1to16}, %zmm19 {z}{k1}
-
-vrcp28ps {sae}, %zmm16, %zmm19
-vrcp28ps {sae}, %zmm16, %zmm19 {k1}
-vrcp28ps {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrcp28sd %xmm16, %xmm17, %xmm19
-vrcp28sd (%rax), %xmm17, %xmm19
-vrcp28sd %xmm16, %xmm17, %xmm19 {k1}
-vrcp28sd (%rax), %xmm17, %xmm19 {k1}
-vrcp28sd %xmm16, %xmm17, %xmm19 {z}{k1}
-vrcp28sd (%rax), %xmm17, %xmm19 {z}{k1}
-
-vrcp28sd {sae}, %xmm16, %xmm17, %xmm19
-vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {k1}
-vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
-
-vrcp28ss %xmm16, %xmm17, %xmm19
-vrcp28ss (%rax), %xmm17, %xmm19
-vrcp28ss %xmm16, %xmm17, %xmm19 {k1}
-vrcp28ss (%rax), %xmm17, %xmm19 {k1}
-vrcp28ss %xmm16, %xmm17, %xmm19 {z}{k1}
-vrcp28ss (%rax), %xmm17, %xmm19 {z}{k1}
-
-vrcp28ss {sae}, %xmm16, %xmm17, %xmm19
-vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {k1}
-vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
-
-vrsqrt28pd %zmm16, %zmm19
-vrsqrt28pd (%rax), %zmm19
-vrsqrt28pd (%rax){1to8}, %zmm19
-vrsqrt28pd %zmm16, %zmm19 {k1}
-vrsqrt28pd (%rax), %zmm19 {k1}
-vrsqrt28pd (%rax){1to8}, %zmm19 {k1}
-vrsqrt28pd %zmm16, %zmm19 {z}{k1}
-vrsqrt28pd (%rax), %zmm19 {z}{k1}
-vrsqrt28pd (%rax){1to8}, %zmm19 {z}{k1}
-
-vrsqrt28pd {sae}, %zmm16, %zmm19
-vrsqrt28pd {sae}, %zmm16, %zmm19 {k1}
-vrsqrt28pd {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrsqrt28ps %zmm16, %zmm19
-vrsqrt28ps (%rax), %zmm19
-vrsqrt28ps (%rax){1to16}, %zmm19
-vrsqrt28ps %zmm16, %zmm19 {k1}
-vrsqrt28ps (%rax), %zmm19 {k1}
-vrsqrt28ps (%rax){1to16}, %zmm19 {k1}
-vrsqrt28ps %zmm16, %zmm19 {z}{k1}
-vrsqrt28ps (%rax), %zmm19 {z}{k1}
-vrsqrt28ps (%rax){1to16}, %zmm19 {z}{k1}
-
-vrsqrt28ps {sae}, %zmm16, %zmm19
-vrsqrt28ps {sae}, %zmm16, %zmm19 {k1}
-vrsqrt28ps {sae}, %zmm16, %zmm19 {z}{k1}
-
-vrsqrt28sd %xmm16, %xmm17, %xmm19
-vrsqrt28sd (%rax), %xmm17, %xmm19
-vrsqrt28sd %xmm16, %xmm17, %xmm19 {k1}
-vrsqrt28sd (%rax), %xmm17, %xmm19 {k1}
-vrsqrt28sd %xmm16, %xmm17, %xmm19 {z}{k1}
-vrsqrt28sd (%rax), %xmm17, %xmm19 {z}{k1}
-
-vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19
-vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {k1}
-vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
-
-vrsqrt28ss %xmm16, %xmm17, %xmm19
-vrsqrt28ss (%rax), %xmm17, %xmm19
-vrsqrt28ss %xmm16, %xmm17, %xmm19 {k1}
-vrsqrt28ss (%rax), %xmm17, %xmm19 {k1}
-vrsqrt28ss %xmm16, %xmm17, %xmm19 {z}{k1}
-vrsqrt28ss (%rax), %xmm17, %xmm19 {z}{k1}
-
-vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19
-vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {k1}
-vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
-
-# CHECK: Instruction Info:
-# CHECK-NEXT: [1]: #uOps
-# CHECK-NEXT: [2]: Latency
-# CHECK-NEXT: [3]: RThroughput
-# CHECK-NEXT: [4]: MayLoad
-# CHECK-NEXT: [5]: MayStore
-# CHECK-NEXT: [6]: HasSideEffects (U)
-
-# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
-# CHECK-NEXT: 1 3 1.00 vexp2pd %zmm16, %zmm19
-# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax), %zmm19
-# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax){1to8}, %zmm19
-# CHECK-NEXT: 1 3 1.00 vexp2pd %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax), %zmm19 {%k1}
-# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT: 1 3 1.00 vexp2pd %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 3 1.00 vexp2pd {sae}, %zmm16, %zmm19
-# CHECK-NEXT: 1 3 1.00 vexp2pd {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 1 3 1.00 vexp2pd {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 3 1.00 vexp2ps %zmm16, %zmm19
-# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax), %zmm19
-# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax){1to16}, %zmm19
-# CHECK-NEXT: 1 3 1.00 vexp2ps %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax), %zmm19 {%k1}
-# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT: 1 3 1.00 vexp2ps %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 3 1.00 vexp2ps {sae}, %zmm16, %zmm19
-# CHECK-NEXT: 1 3 1.00 vexp2ps {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 1 3 1.00 vexp2ps {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 3 7 2.00 vrcp28pd %zmm16, %zmm19
-# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax), %zmm19
-# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax){1to8}, %zmm19
-# CHECK-NEXT: 3 7 2.00 vrcp28pd %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax), %zmm19 {%k1}
-# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT: 3 7 2.00 vrcp28pd %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT: 3 7 2.00 vrcp28pd {sae}, %zmm16, %zmm19
-# CHECK-NEXT: 3 7 2.00 vrcp28pd {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 3 7 2.00 vrcp28pd {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 3 7 2.00 vrcp28ps %zmm16, %zmm19
-# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax), %zmm19
-# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax){1to16}, %zmm19
-# CHECK-NEXT: 3 7 2.00 vrcp28ps %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax), %zmm19 {%k1}
-# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT: 3 7 2.00 vrcp28ps %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT: 3 7 2.00 vrcp28ps {sae}, %zmm16, %zmm19
-# CHECK-NEXT: 3 7 2.00 vrcp28ps {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 3 7 2.00 vrcp28ps {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 5 1.00 vrcp28sd %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 2 11 1.00 * vrcp28sd (%rax), %xmm17, %xmm19
-# CHECK-NEXT: 1 5 1.00 vrcp28sd %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 2 11 1.00 * vrcp28sd (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 5 1.00 vrcp28sd %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 2 11 1.00 * vrcp28sd (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 5 1.00 vrcp28sd {sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 1 5 1.00 vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 5 1.00 vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 5 1.00 vrcp28ss %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 2 11 1.00 * vrcp28ss (%rax), %xmm17, %xmm19
-# CHECK-NEXT: 1 5 1.00 vrcp28ss %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 2 11 1.00 * vrcp28ss (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 5 1.00 vrcp28ss %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 2 11 1.00 * vrcp28ss (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 5 1.00 vrcp28ss {sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 1 5 1.00 vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 5 1.00 vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 3 7 2.00 vrsqrt28pd %zmm16, %zmm19
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax), %zmm19
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax){1to8}, %zmm19
-# CHECK-NEXT: 3 7 2.00 vrsqrt28pd %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax), %zmm19 {%k1}
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT: 3 7 2.00 vrsqrt28pd %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT: 3 7 2.00 vrsqrt28pd {sae}, %zmm16, %zmm19
-# CHECK-NEXT: 3 7 2.00 vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 3 7 2.00 vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 3 7 2.00 vrsqrt28ps %zmm16, %zmm19
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax), %zmm19
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax){1to16}, %zmm19
-# CHECK-NEXT: 3 7 2.00 vrsqrt28ps %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax), %zmm19 {%k1}
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT: 3 7 2.00 vrsqrt28ps %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT: 3 7 2.00 vrsqrt28ps {sae}, %zmm16, %zmm19
-# CHECK-NEXT: 3 7 2.00 vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: 3 7 2.00 vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: 1 5 1.00 vrsqrt28sd %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 2 11 1.00 * vrsqrt28sd (%rax), %xmm17, %xmm19
-# CHECK-NEXT: 1 5 1.00 vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 2 11 1.00 * vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 5 1.00 vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 2 11 1.00 * vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 5 1.00 vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 1 5 1.00 vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 5 1.00 vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 5 1.00 vrsqrt28ss %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 2 11 1.00 * vrsqrt28ss (%rax), %xmm17, %xmm19
-# CHECK-NEXT: 1 5 1.00 vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 2 11 1.00 * vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 5 1.00 vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 2 11 1.00 * vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: 1 5 1.00 vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: 1 5 1.00 vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: 1 5 1.00 vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-
-# CHECK: Resources:
-# CHECK-NEXT: [0] - SBDivider
-# CHECK-NEXT: [1] - SBFPDivider
-# CHECK-NEXT: [2] - SBPort0
-# CHECK-NEXT: [3] - SBPort1
-# CHECK-NEXT: [4] - SBPort4
-# CHECK-NEXT: [5] - SBPort5
-# CHECK-NEXT: [6.0] - SBPort23
-# CHECK-NEXT: [6.1] - SBPort23
-
-# CHECK: Resource pressure per iteration:
-# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
-# CHECK-NEXT: - - 156.00 24.00 - 24.00 24.00 24.00
-
-# CHECK: Resource pressure by instruction:
-# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
-# CHECK-NEXT: - - - 1.00 - - - - vexp2pd %zmm16, %zmm19
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax), %zmm19
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax){1to8}, %zmm19
-# CHECK-NEXT: - - - 1.00 - - - - vexp2pd %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax), %zmm19 {%k1}
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT: - - - 1.00 - - - - vexp2pd %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - - 1.00 - - - - vexp2pd {sae}, %zmm16, %zmm19
-# CHECK-NEXT: - - - 1.00 - - - - vexp2pd {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - - 1.00 - - - - vexp2pd {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - - 1.00 - - - - vexp2ps %zmm16, %zmm19
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax), %zmm19
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax){1to16}, %zmm19
-# CHECK-NEXT: - - - 1.00 - - - - vexp2ps %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax), %zmm19 {%k1}
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT: - - - 1.00 - - - - vexp2ps %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - - 1.00 - - - - vexp2ps {sae}, %zmm16, %zmm19
-# CHECK-NEXT: - - - 1.00 - - - - vexp2ps {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - - 1.00 - - - - vexp2ps {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd %zmm16, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax), %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax){1to8}, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax), %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd {sae}, %zmm16, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps %zmm16, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax), %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax){1to16}, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax), %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps {sae}, %zmm16, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28sd (%rax), %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28sd (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28sd (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd {sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28ss (%rax), %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28ss (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28ss (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss {sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd %zmm16, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax), %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax){1to8}, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax), %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax){1to8}, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax){1to8}, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd {sae}, %zmm16, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps %zmm16, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax), %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax){1to16}, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax), %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax){1to16}, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax), %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax){1to16}, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps {sae}, %zmm16, %zmm19
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1}
-# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28sd (%rax), %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28ss (%rax), %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1} {z}
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1}
-# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
index 0a7cc3854056c..bfebd944d7a88 100644
--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
@@ -140,7 +140,6 @@ copy("Headers") {
"avx512bwintrin.h",
"avx512cdintrin.h",
"avx512dqintrin.h",
- "avx512erintrin.h",
"avx512fintrin.h",
"avx512fp16intrin.h",
"avx512ifmaintrin.h",
>From 2d78a527ce435368a014f9ef398d80a5844339a9 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 26 Dec 2023 10:47:09 +0800
Subject: [PATCH 2/6] Remove AVX512PF intrinsic supports.
---
clang/include/clang/Basic/BuiltinsX86.def | 9 --
clang/include/clang/Driver/Options.td | 2 -
clang/lib/Basic/Targets/X86.cpp | 7 --
clang/lib/Basic/Targets/X86.h | 1 -
clang/lib/Headers/CMakeLists.txt | 1 -
clang/lib/Headers/avx512pfintrin.h | 92 ----------------
clang/lib/Headers/cpuid.h | 1 -
clang/lib/Headers/immintrin.h | 4 -
clang/lib/Sema/SemaChecking.cpp | 20 ----
clang/test/CodeGen/X86/avx512pf-builtins.c | 100 ------------------
clang/test/CodeGen/target-builtin-noerror.c | 1 -
clang/test/Driver/cl-x86-flags.c | 2 -
clang/test/Driver/x86-target-features.c | 8 +-
clang/test/Frontend/x86-target-cpu.c | 2 -
.../Preprocessor/predefined-arch-macros.c | 4 -
clang/test/Preprocessor/x86_target_features.c | 32 ------
clang/test/Sema/builtins-x86.c | 8 --
llvm/include/llvm/IR/IntrinsicsX86.td | 32 ------
llvm/lib/Target/X86/X86.td | 4 -
llvm/lib/Target/X86/X86InstrAVX512.td | 2 +-
llvm/lib/Target/X86/X86InstrPredicates.td | 1 -
llvm/lib/Target/X86/X86IntrinsicsInfo.h | 17 ---
llvm/lib/TargetParser/Host.cpp | 3 -
...avx512-gather-scatter-intrin-deprecated.ll | 24 -----
.../X86/avx512-gather-scatter-intrin.ll | 24 -----
.../X86/insert-prefetch-invalid-instr.ll | 5 -
.../X86/speculative-load-hardening-gather.ll | 22 ----
27 files changed, 5 insertions(+), 423 deletions(-)
delete mode 100644 clang/lib/Headers/avx512pfintrin.h
delete mode 100644 clang/test/CodeGen/X86/avx512pf-builtins.c
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 7118ee0206ce1..7074479786b97 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -948,15 +948,6 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx
TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vv*UcV8OiV8OiIi", "nV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8ivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16ivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
-
TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 5b681382f2b07..dfd0cac68ea95 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6115,8 +6115,6 @@ def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>;
def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group<m_x86_Features_Group>;
def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group<m_x86_Features_Group>;
-def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
-def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>;
def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index a263c26904cd6..edded5759ad6d 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -313,9 +313,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
} else if (Feature == "+avx512fp16") {
HasAVX512FP16 = true;
HasLegalHalfType = true;
- } else if (Feature == "+avx512pf") {
- HasAVX512PF = true;
- Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+avx512dq") {
HasAVX512DQ = true;
} else if (Feature == "+avx512bitalg") {
@@ -839,8 +836,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512BF16__");
if (HasAVX512FP16)
Builder.defineMacro("__AVX512FP16__");
- if (HasAVX512PF)
- Builder.defineMacro("__AVX512PF__");
if (HasAVX512DQ)
Builder.defineMacro("__AVX512DQ__");
if (HasAVX512BITALG)
@@ -1080,7 +1075,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512vnni", true)
.Case("avx512bf16", true)
.Case("avx512fp16", true)
- .Case("avx512pf", true)
.Case("avx512dq", true)
.Case("avx512bitalg", true)
.Case("avx512bw", true)
@@ -1196,7 +1190,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512vnni", HasAVX512VNNI)
.Case("avx512bf16", HasAVX512BF16)
.Case("avx512fp16", HasAVX512FP16)
- .Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
.Case("avx512bitalg", HasAVX512BITALG)
.Case("avx512bw", HasAVX512BW)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index cfb163d14b8d5..281c5ce949ce6 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -103,7 +103,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512VNNI = false;
bool HasAVX512FP16 = false;
bool HasAVX512BF16 = false;
- bool HasAVX512PF = false;
bool HasAVX512DQ = false;
bool HasAVX512BITALG = false;
bool HasAVX512BW = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 95e0d444d4344..dbff92b4e59b4 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -157,7 +157,6 @@ set(x86_files
avx512fp16intrin.h
avx512ifmaintrin.h
avx512ifmavlintrin.h
- avx512pfintrin.h
avx512vbmi2intrin.h
avx512vbmiintrin.h
avx512vbmivlintrin.h
diff --git a/clang/lib/Headers/avx512pfintrin.h b/clang/lib/Headers/avx512pfintrin.h
deleted file mode 100644
index f853be021a2dd..0000000000000
--- a/clang/lib/Headers/avx512pfintrin.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*===------------- avx512pfintrin.h - PF intrinsics ------------------------===
- *
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error "Never use <avx512pfintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef __AVX512PFINTRIN_H
-#define __AVX512PFINTRIN_H
-
-#define _mm512_mask_prefetch_i32gather_pd(index, mask, addr, scale, hint) \
- __builtin_ia32_gatherpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
- (void const *)(addr), (int)(scale), \
- (int)(hint))
-
-#define _mm512_prefetch_i32gather_pd(index, addr, scale, hint) \
- __builtin_ia32_gatherpfdpd((__mmask8) -1, (__v8si)(__m256i)(index), \
- (void const *)(addr), (int)(scale), \
- (int)(hint))
-
-#define _mm512_mask_prefetch_i32gather_ps(index, mask, addr, scale, hint) \
- __builtin_ia32_gatherpfdps((__mmask16)(mask), \
- (__v16si)(__m512i)(index), (void const *)(addr), \
- (int)(scale), (int)(hint))
-
-#define _mm512_prefetch_i32gather_ps(index, addr, scale, hint) \
- __builtin_ia32_gatherpfdps((__mmask16) -1, \
- (__v16si)(__m512i)(index), (void const *)(addr), \
- (int)(scale), (int)(hint))
-
-#define _mm512_mask_prefetch_i64gather_pd(index, mask, addr, scale, hint) \
- __builtin_ia32_gatherpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
- (void const *)(addr), (int)(scale), \
- (int)(hint))
-
-#define _mm512_prefetch_i64gather_pd(index, addr, scale, hint) \
- __builtin_ia32_gatherpfqpd((__mmask8) -1, (__v8di)(__m512i)(index), \
- (void const *)(addr), (int)(scale), \
- (int)(hint))
-
-#define _mm512_mask_prefetch_i64gather_ps(index, mask, addr, scale, hint) \
- __builtin_ia32_gatherpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
- (void const *)(addr), (int)(scale), (int)(hint))
-
-#define _mm512_prefetch_i64gather_ps(index, addr, scale, hint) \
- __builtin_ia32_gatherpfqps((__mmask8) -1, (__v8di)(__m512i)(index), \
- (void const *)(addr), (int)(scale), (int)(hint))
-
-#define _mm512_prefetch_i32scatter_pd(addr, index, scale, hint) \
- __builtin_ia32_scatterpfdpd((__mmask8)-1, (__v8si)(__m256i)(index), \
- (void *)(addr), (int)(scale), \
- (int)(hint))
-
-#define _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, scale, hint) \
- __builtin_ia32_scatterpfdpd((__mmask8)(mask), (__v8si)(__m256i)(index), \
- (void *)(addr), (int)(scale), \
- (int)(hint))
-
-#define _mm512_prefetch_i32scatter_ps(addr, index, scale, hint) \
- __builtin_ia32_scatterpfdps((__mmask16)-1, (__v16si)(__m512i)(index), \
- (void *)(addr), (int)(scale), (int)(hint))
-
-#define _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, scale, hint) \
- __builtin_ia32_scatterpfdps((__mmask16)(mask), \
- (__v16si)(__m512i)(index), (void *)(addr), \
- (int)(scale), (int)(hint))
-
-#define _mm512_prefetch_i64scatter_pd(addr, index, scale, hint) \
- __builtin_ia32_scatterpfqpd((__mmask8)-1, (__v8di)(__m512i)(index), \
- (void *)(addr), (int)(scale), \
- (int)(hint))
-
-#define _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, scale, hint) \
- __builtin_ia32_scatterpfqpd((__mmask8)(mask), (__v8di)(__m512i)(index), \
- (void *)(addr), (int)(scale), \
- (int)(hint))
-
-#define _mm512_prefetch_i64scatter_ps(addr, index, scale, hint) \
- __builtin_ia32_scatterpfqps((__mmask8)-1, (__v8di)(__m512i)(index), \
- (void *)(addr), (int)(scale), (int)(hint))
-
-#define _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, scale, hint) \
- __builtin_ia32_scatterpfqps((__mmask8)(mask), (__v8di)(__m512i)(index), \
- (void *)(addr), (int)(scale), (int)(hint))
-
-#endif
diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index 3bb7fb0fb8a6b..378d1a9a8bdaa 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -159,7 +159,6 @@
#define bit_AVX512IFMA 0x00200000
#define bit_CLFLUSHOPT 0x00800000
#define bit_CLWB 0x01000000
-#define bit_AVX512PF 0x04000000
#define bit_AVX512CD 0x10000000
#define bit_SHA 0x20000000
#define bit_AVX512BW 0x40000000
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 649be82ce8c1e..cd6cf09b90cad 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -182,10 +182,6 @@
#include <avx512vlvbmi2intrin.h>
#endif
-#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512PF__)
-#include <avx512pfintrin.h>
-#endif
-
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
#include <avx512fp16intrin.h>
#endif
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 32a8ff9e44d2f..14ad16bfcbe9b 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -7047,16 +7047,6 @@ bool Sema::CheckX86BuiltinGatherScatterScale(unsigned BuiltinID,
switch (BuiltinID) {
default:
return false;
- case X86::BI__builtin_ia32_gatherpfdpd:
- case X86::BI__builtin_ia32_gatherpfdps:
- case X86::BI__builtin_ia32_gatherpfqpd:
- case X86::BI__builtin_ia32_gatherpfqps:
- case X86::BI__builtin_ia32_scatterpfdpd:
- case X86::BI__builtin_ia32_scatterpfdps:
- case X86::BI__builtin_ia32_scatterpfqpd:
- case X86::BI__builtin_ia32_scatterpfqps:
- ArgNum = 3;
- break;
case X86::BI__builtin_ia32_gatherd_pd:
case X86::BI__builtin_ia32_gatherd_pd256:
case X86::BI__builtin_ia32_gatherq_pd:
@@ -7563,16 +7553,6 @@ bool Sema::CheckX86BuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID,
case X86::BI__builtin_ia32_vsm3rnds2:
i = 3; l = 0; u = 255;
break;
- case X86::BI__builtin_ia32_gatherpfdpd:
- case X86::BI__builtin_ia32_gatherpfdps:
- case X86::BI__builtin_ia32_gatherpfqpd:
- case X86::BI__builtin_ia32_gatherpfqps:
- case X86::BI__builtin_ia32_scatterpfdpd:
- case X86::BI__builtin_ia32_scatterpfdps:
- case X86::BI__builtin_ia32_scatterpfqpd:
- case X86::BI__builtin_ia32_scatterpfqps:
- i = 4; l = 2; u = 3;
- break;
case X86::BI__builtin_ia32_reducesd_mask:
case X86::BI__builtin_ia32_reducess_mask:
case X86::BI__builtin_ia32_rndscalesd_round_mask:
diff --git a/clang/test/CodeGen/X86/avx512pf-builtins.c b/clang/test/CodeGen/X86/avx512pf-builtins.c
deleted file mode 100644
index 3a117ed6a9460..0000000000000
--- a/clang/test/CodeGen/X86/avx512pf-builtins.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512pf -emit-llvm -o - -Wall | FileCheck %s
-
-
-#include <immintrin.h>
-
-void test_mm512_mask_prefetch_i32gather_pd(__m256i index, __mmask8 mask, void const *addr) {
- // CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_pd
- // CHECK: @llvm.x86.avx512.gatherpf.dpd
- return _mm512_mask_prefetch_i32gather_pd(index, mask, addr, 2, _MM_HINT_T0);
-}
-
-void test_mm512_prefetch_i32gather_pd(__m256i index, void const *addr) {
- // CHECK-LABEL: @test_mm512_prefetch_i32gather_pd
- // CHECK: @llvm.x86.avx512.gatherpf.dpd
- return _mm512_prefetch_i32gather_pd(index, addr, 2, _MM_HINT_T0);
-}
-
-void test_mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, void const *addr) {
- // CHECK-LABEL: @test_mm512_mask_prefetch_i32gather_ps
- // CHECK: @llvm.x86.avx512.gatherpf.dps
- return _mm512_mask_prefetch_i32gather_ps(index, mask, addr, 2, _MM_HINT_T0);
-}
-
-void test_mm512_prefetch_i32gather_ps(__m512i index, void const *addr) {
- // CHECK-LABEL: @test_mm512_prefetch_i32gather_ps
- // CHECK: @llvm.x86.avx512.gatherpf.dps
- return _mm512_prefetch_i32gather_ps(index, addr, 2, _MM_HINT_T0);
-}
-
-void test_mm512_mask_prefetch_i64gather_pd(__m512i index, __mmask8 mask, void const *addr) {
- // CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_pd
- // CHECK: @llvm.x86.avx512.gatherpf.qpd
- return _mm512_mask_prefetch_i64gather_pd(index, mask, addr, 2, _MM_HINT_T0);
-}
-
-void test_mm512_prefetch_i64gather_pd(__m512i index, void const *addr) {
- // CHECK-LABEL: @test_mm512_prefetch_i64gather_pd
- // CHECK: @llvm.x86.avx512.gatherpf.qpd
- return _mm512_prefetch_i64gather_pd(index, addr, 2, _MM_HINT_T0);
-}
-
-void test_mm512_mask_prefetch_i64gather_ps(__m512i index, __mmask8 mask, void const *addr) {
- // CHECK-LABEL: @test_mm512_mask_prefetch_i64gather_ps
- // CHECK: @llvm.x86.avx512.gatherpf.qps
- return _mm512_mask_prefetch_i64gather_ps(index, mask, addr, 2, _MM_HINT_T0);
-}
-
-void test_mm512_prefetch_i64gather_ps(__m512i index, void const *addr) {
- // CHECK-LABEL: @test_mm512_prefetch_i64gather_ps
- // CHECK: @llvm.x86.avx512.gatherpf.qps
- return _mm512_prefetch_i64gather_ps(index, addr, 2, _MM_HINT_T0);
-}
-
-void test_mm512_prefetch_i32scatter_pd(void *addr, __m256i index) {
- // CHECK-LABEL: @test_mm512_prefetch_i32scatter_pd
- // CHECK: @llvm.x86.avx512.scatterpf.dpd.512
- return _mm512_prefetch_i32scatter_pd(addr, index, 1, _MM_HINT_T1);
-}
-
-void test_mm512_mask_prefetch_i32scatter_pd(void *addr, __mmask8 mask, __m256i index) {
- // CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_pd
- // CHECK: @llvm.x86.avx512.scatterpf.dpd.512
- return _mm512_mask_prefetch_i32scatter_pd(addr, mask, index, 1, _MM_HINT_T1);
-}
-
-void test_mm512_prefetch_i32scatter_ps(void *addr, __m512i index) {
- // CHECK-LABEL: @test_mm512_prefetch_i32scatter_ps
- // CHECK: @llvm.x86.avx512.scatterpf.dps.512
- return _mm512_prefetch_i32scatter_ps(addr, index, 1, _MM_HINT_T1);
-}
-
-void test_mm512_mask_prefetch_i32scatter_ps(void *addr, __mmask16 mask, __m512i index) {
- // CHECK-LABEL: @test_mm512_mask_prefetch_i32scatter_ps
- // CHECK: @llvm.x86.avx512.scatterpf.dps.512
- return _mm512_mask_prefetch_i32scatter_ps(addr, mask, index, 1, _MM_HINT_T1);
-}
-
-void test_mm512_prefetch_i64scatter_pd(void *addr, __m512i index) {
- // CHECK-LABEL: @test_mm512_prefetch_i64scatter_pd
- // CHECK: @llvm.x86.avx512.scatterpf.qpd.512
- return _mm512_prefetch_i64scatter_pd(addr, index, 1, _MM_HINT_T1);
-}
-
-void test_mm512_mask_prefetch_i64scatter_pd(void *addr, __mmask16 mask, __m512i index) {
- // CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_pd
- // CHECK: @llvm.x86.avx512.scatterpf.qpd.512
- return _mm512_mask_prefetch_i64scatter_pd(addr, mask, index, 1, _MM_HINT_T1);
-}
-
-void test_mm512_prefetch_i64scatter_ps(void *addr, __m512i index) {
- // CHECK-LABEL: @test_mm512_prefetch_i64scatter_ps
- // CHECK: @llvm.x86.avx512.scatterpf.qps.512
- return _mm512_prefetch_i64scatter_ps(addr, index, 1, _MM_HINT_T1);
-}
-
-void test_mm512_mask_prefetch_i64scatter_ps(void *addr, __mmask16 mask, __m512i index) {
- // CHECK-LABEL: @test_mm512_mask_prefetch_i64scatter_ps
- // CHECK: @llvm.x86.avx512.scatterpf.qps.512
- return _mm512_mask_prefetch_i64scatter_ps(addr, mask, index, 1, _MM_HINT_T1);
-}
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 79085d3c0c722..2e16fd8b9fe4d 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -68,7 +68,6 @@ void verifyfeaturestrings(void) {
(void)__builtin_cpu_supports("avx512bw");
(void)__builtin_cpu_supports("avx512dq");
(void)__builtin_cpu_supports("avx512cd");
- (void)__builtin_cpu_supports("avx512pf");
(void)__builtin_cpu_supports("avx512vbmi");
(void)__builtin_cpu_supports("avx512ifma");
(void)__builtin_cpu_supports("avx5124vnniw");
diff --git a/clang/test/Driver/cl-x86-flags.c b/clang/test/Driver/cl-x86-flags.c
index 9a3a65faaba9a..ffa4602fdbeec 100644
--- a/clang/test/Driver/cl-x86-flags.c
+++ b/clang/test/Driver/cl-x86-flags.c
@@ -71,7 +71,6 @@
// RUN: %clang_cl -m32 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify=KNL1 -DTEST_32_ARCH_AVX512F -- %s
// KNL1-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
-// KNL1-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
#if defined(TEST_32_ARCH_AVX512F)
#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__
#error fail
@@ -113,7 +112,6 @@
// RUN: %clang_cl -m64 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify=KNL2 -DTEST_64_ARCH_AVX512F -- %s
// KNL2-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
-// KNL2-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
#if defined(TEST_64_ARCH_AVX512F)
#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__
#error fail
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 0afe8f6da45bb..08674b6e7172e 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -21,10 +21,10 @@
// SSE4-AES: "-target-feature" "+sse4.2" "-target-feature" "+aes"
// NO-SSE4-AES: "-target-feature" "-sse4.1" "-target-feature" "-aes"
-// RUN: %clang --target=i386 -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512pf -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### 2>&1 | FileCheck -check-prefix=AVX %s
-// RUN: %clang --target=i386 -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512pf -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### 2>&1 | FileCheck -check-prefix=NO-AVX %s
-// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512pf" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma"
-// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512pf" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma"
+// RUN: %clang --target=i386 -march=i386 -mavx -mavx2 -mavx512f -mavx512cd -mavx512dq -mavx512bw -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma %s -### 2>&1 | FileCheck -check-prefix=AVX %s
+// RUN: %clang --target=i386 -march=i386 -mno-avx -mno-avx2 -mno-avx512f -mno-avx512cd -mno-avx512dq -mno-avx512bw -mno-avx512vl -mno-avx512vbmi -mno-avx512vbmi2 -mno-avx512ifma %s -### 2>&1 | FileCheck -check-prefix=NO-AVX %s
+// AVX: "-target-feature" "+avx" "-target-feature" "+avx2" "-target-feature" "+avx512f" "-target-feature" "+avx512cd" "-target-feature" "+avx512dq" "-target-feature" "+avx512bw" "-target-feature" "+avx512vl" "-target-feature" "+avx512vbmi" "-target-feature" "+avx512vbmi2" "-target-feature" "+avx512ifma"
+// NO-AVX: "-target-feature" "-avx" "-target-feature" "-avx2" "-target-feature" "-avx512f" "-target-feature" "-avx512cd" "-target-feature" "-avx512dq" "-target-feature" "-avx512bw" "-target-feature" "-avx512vl" "-target-feature" "-avx512vbmi" "-target-feature" "-avx512vbmi2" "-target-feature" "-avx512ifma"
// RUN: %clang --target=i386 -march=i386 -mpclmul -mrdrnd -mfsgsbase -mbmi -mbmi2 %s -### 2>&1 | FileCheck -check-prefix=BMI %s
// RUN: %clang --target=i386 -march=i386 -mno-pclmul -mno-rdrnd -mno-fsgsbase -mno-bmi -mno-bmi2 %s -### 2>&1 | FileCheck -check-prefix=NO-BMI %s
diff --git a/clang/test/Frontend/x86-target-cpu.c b/clang/test/Frontend/x86-target-cpu.c
index 8c701cc364390..d2184538594e1 100644
--- a/clang/test/Frontend/x86-target-cpu.c
+++ b/clang/test/Frontend/x86-target-cpu.c
@@ -17,10 +17,8 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake-server -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knl -verify=knl %s
// knl-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
-// knl-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knm -verify=knm %s
// knm-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
-// knm-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu bonnell -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu silvermont -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu k8 -verify %s
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index e14101f4d9961..0152682ba5728 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -794,7 +794,6 @@
// CHECK_KNL_M32: #define __AVX2__ 1
// CHECK_KNL_M32: #define __AVX512CD__ 1
// CHECK_KNL_M32: #define __AVX512F__ 1
-// CHECK_KNL_M32: #define __AVX512PF__ 1
// CHECK_KNL_M32: #define __AVX__ 1
// CHECK_KNL_M32: #define __BMI2__ 1
// CHECK_KNL_M32: #define __BMI__ 1
@@ -832,7 +831,6 @@
// CHECK_KNL_M64: #define __AVX2__ 1
// CHECK_KNL_M64: #define __AVX512CD__ 1
// CHECK_KNL_M64: #define __AVX512F__ 1
-// CHECK_KNL_M64: #define __AVX512PF__ 1
// CHECK_KNL_M64: #define __AVX__ 1
// CHECK_KNL_M64: #define __BMI2__ 1
// CHECK_KNL_M64: #define __BMI__ 1
@@ -873,7 +871,6 @@
// CHECK_KNM_M32: #define __AVX2__ 1
// CHECK_KNM_M32: #define __AVX512CD__ 1
// CHECK_KNM_M32: #define __AVX512F__ 1
-// CHECK_KNM_M32: #define __AVX512PF__ 1
// CHECK_KNM_M32: #define __AVX512VPOPCNTDQ__ 1
// CHECK_KNM_M32: #define __AVX__ 1
// CHECK_KNM_M32: #define __BMI2__ 1
@@ -909,7 +906,6 @@
// CHECK_KNM_M64: #define __AVX2__ 1
// CHECK_KNM_M64: #define __AVX512CD__ 1
// CHECK_KNM_M64: #define __AVX512F__ 1
-// CHECK_KNM_M64: #define __AVX512PF__ 1
// CHECK_KNM_M64: #define __AVX512VPOPCNTDQ__ 1
// CHECK_KNM_M64: #define __AVX__ 1
// CHECK_KNM_M64: #define __BMI2__ 1
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index cd8cf85622fa9..7567267be26b4 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -90,22 +90,6 @@
// AVX512CD: #define __SSE__ 1
// AVX512CD: #define __SSSE3__ 1
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512PF %s
-
-// AVX512PF: #define __AVX2__ 1
-// AVX512PF: #define __AVX512F__ 1
-// AVX512PF: #define __AVX512PF__ 1
-// AVX512PF: #define __AVX__ 1
-// AVX512PF: #define __EVEX512__ 1
-// AVX512PF: #define __SSE2_MATH__ 1
-// AVX512PF: #define __SSE2__ 1
-// AVX512PF: #define __SSE3__ 1
-// AVX512PF: #define __SSE4_1__ 1
-// AVX512PF: #define __SSE4_2__ 1
-// AVX512PF: #define __SSE_MATH__ 1
-// AVX512PF: #define __SSE__ 1
-// AVX512PF: #define __SSSE3__ 1
-
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512dq -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512DQ %s
// AVX512DQ: #define __AVX2__ 1
@@ -155,22 +139,6 @@
// AVX512VL: #define __SSE__ 1
// AVX512VL: #define __SSSE3__ 1
-// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512pf -mno-avx512f -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512F2 %s
-
-// AVX512F2: #define __AVX2__ 1
-// AVX512F2-NOT: #define __AVX512F__ 1
-// AVX512F2-NOT: #define __AVX512PF__ 1
-// AVX512F2-NOT: #define __EVEX512__ 1
-// AVX512F2: #define __AVX__ 1
-// AVX512F2: #define __SSE2_MATH__ 1
-// AVX512F2: #define __SSE2__ 1
-// AVX512F2: #define __SSE3__ 1
-// AVX512F2: #define __SSE4_1__ 1
-// AVX512F2: #define __SSE4_2__ 1
-// AVX512F2: #define __SSE_MATH__ 1
-// AVX512F2: #define __SSE__ 1
-// AVX512F2: #define __SSSE3__ 1
-
// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512ifma -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512IFMA %s
// AVX512IFMA: #define __AVX2__ 1
diff --git a/clang/test/Sema/builtins-x86.c b/clang/test/Sema/builtins-x86.c
index cbaf7bcde871e..7d9cdce3d7894 100644
--- a/clang/test/Sema/builtins-x86.c
+++ b/clang/test/Sema/builtins-x86.c
@@ -106,14 +106,6 @@ __m128i test_mm_mask_i32gather_epi32(__m128i a, int const *b, __m128i c, __m128i
return __builtin_ia32_gatherd_d(a, b, c, mask, 5); // expected-error {{scale argument must be 1, 2, 4, or 8}}
}
-void _mm512_mask_prefetch_i32gather_ps(__m512i index, __mmask16 mask, int const *addr) {
- __builtin_ia32_gatherpfdps(mask, index, addr, 5, 1); // expected-error {{scale argument must be 1, 2, 4, or 8}}
-}
-
-void _mm512_mask_prefetch_i32gather_ps_2(__m512i index, __mmask16 mask, int const *addr) {
- __builtin_ia32_gatherpfdps(mask, index, addr, 1, 1); // expected-error {{argument value 1 is outside the valid range [2, 3]}}
-}
-
__m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) {
return __builtin_ia32_vpshldq512(__A, __B, 1024); // expected-error {{argument value 1024 is outside the valid range [0, 255]}}
}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 0fda7e66c06a7..aee804047e1b0 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -4125,38 +4125,6 @@ let TargetPrefix = "x86" in {
Intrinsic<[],
[llvm_ptr_ty, llvm_i8_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty],
[ImmArg<ArgIndex<4>>]>;
-
- // gather prefetch
- // NOTE: These can't be ArgMemOnly because you can put the address completely
- // in the index register.
- def int_x86_avx512_gatherpf_dpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfdpd">,
- Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_gatherpf_dps_512 : ClangBuiltin<"__builtin_ia32_gatherpfdps">,
- Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_gatherpf_qpd_512 : ClangBuiltin<"__builtin_ia32_gatherpfqpd">,
- Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_gatherpf_qps_512 : ClangBuiltin<"__builtin_ia32_gatherpfqps">,
- Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
-
- // scatter prefetch
- // NOTE: These can't be ArgMemOnly because you can put the address completely
- // in the index register.
- def int_x86_avx512_scatterpf_dpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfdpd">,
- Intrinsic<[], [llvm_i8_ty, llvm_v8i32_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_scatterpf_dps_512 : ClangBuiltin<"__builtin_ia32_scatterpfdps">,
- Intrinsic<[], [llvm_i16_ty, llvm_v16i32_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_scatterpf_qpd_512 : ClangBuiltin<"__builtin_ia32_scatterpfqpd">,
- Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
- def int_x86_avx512_scatterpf_qps_512 : ClangBuiltin<"__builtin_ia32_scatterpfqps">,
- Intrinsic<[], [llvm_i8_ty, llvm_v8i64_ty, llvm_ptr_ty,
- llvm_i32_ty, llvm_i32_ty], [ImmArg<ArgIndex<3>>, ImmArg<ArgIndex<4>>]>;
}
// AVX512 gather/scatter intrinsics that use vXi1 masks.
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 686237ab6c6f8..32de52f3aea51 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -130,9 +130,6 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true",
def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
"true", "Enable AVX-512 Population Count Instructions",
[FeatureAVX512]>;
-def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
- "Enable AVX-512 PreFetch Instructions",
- [FeatureAVX512]>;
def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI",
"true",
"Prefetch instruction with T0 or T1 Hint">;
@@ -1310,7 +1307,6 @@ def ProcessorFeatures {
FeatureAVX512,
FeatureEVEX512,
FeatureCDI,
- FeaturePFI,
FeaturePREFETCHWT1,
FeatureADX,
FeatureRDSEED,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index c77146b37df2e..8e122f775a8a7 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -10357,7 +10357,7 @@ defm VPSCATTER : avx512_scatter_q_pd<0xA0, 0xA1, avx512vl_i64_info, "vpscatter",
// prefetch
multiclass avx512_gather_scatter_prefetch<bits<8> opc, Format F, string OpcodeStr,
RegisterClass KRC, X86MemOperand memop> {
- let Predicates = [HasPFI], mayLoad = 1, mayStore = 1 in
+ let mayLoad = 1, mayStore = 1 in
def m : AVX5128I<opc, F, (outs), (ins KRC:$mask, memop:$src),
!strconcat(OpcodeStr, "\t{$src {${mask}}|{${mask}}, $src}"), []>,
EVEX, EVEX_K, Sched<[WriteLoad]>;
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index ef6a2d7eff638..570097bf8dcb4 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -79,7 +79,6 @@ def UseAVX2 : Predicate<"Subtarget->hasAVX2() && !Subtarget->hasAVX512()">;
def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">;
def HasCDI : Predicate<"Subtarget->hasCDI()">;
def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">;
-def HasPFI : Predicate<"Subtarget->hasPFI()">;
def HasDQI : Predicate<"Subtarget->hasDQI()">;
def NoDQI : Predicate<"!Subtarget->hasDQI()">;
def HasBWI : Predicate<"Subtarget->hasBWI()">;
diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
index 9a0a4e8657035..e3961e0094d3a 100644
--- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h
@@ -108,15 +108,6 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_gather3siv8_sf, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_gather3siv8_si, GATHER, 0, 0),
- X86_INTRINSIC_DATA(avx512_gatherpf_dpd_512, PREFETCH,
- X86::VGATHERPF0DPDm, X86::VGATHERPF1DPDm),
- X86_INTRINSIC_DATA(avx512_gatherpf_dps_512, PREFETCH,
- X86::VGATHERPF0DPSm, X86::VGATHERPF1DPSm),
- X86_INTRINSIC_DATA(avx512_gatherpf_qpd_512, PREFETCH,
- X86::VGATHERPF0QPDm, X86::VGATHERPF1QPDm),
- X86_INTRINSIC_DATA(avx512_gatherpf_qps_512, PREFETCH,
- X86::VGATHERPF0QPSm, X86::VGATHERPF1QPSm),
-
X86_INTRINSIC_DATA(avx512_mask_gather_dpd_512, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_mask_gather_dpi_512, GATHER, 0, 0),
X86_INTRINSIC_DATA(avx512_mask_gather_dpq_512, GATHER, 0, 0),
@@ -292,14 +283,6 @@ static const IntrinsicData IntrinsicsWithChain[] = {
X86_INTRINSIC_DATA(avx512_scatterdiv4_si, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv8_sf, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scatterdiv8_si, SCATTER, 0, 0),
- X86_INTRINSIC_DATA(avx512_scatterpf_dpd_512, PREFETCH, X86::VSCATTERPF0DPDm,
- X86::VSCATTERPF1DPDm),
- X86_INTRINSIC_DATA(avx512_scatterpf_dps_512, PREFETCH, X86::VSCATTERPF0DPSm,
- X86::VSCATTERPF1DPSm),
- X86_INTRINSIC_DATA(avx512_scatterpf_qpd_512, PREFETCH, X86::VSCATTERPF0QPDm,
- X86::VSCATTERPF1QPDm),
- X86_INTRINSIC_DATA(avx512_scatterpf_qps_512, PREFETCH, X86::VSCATTERPF0QPSm,
- X86::VSCATTERPF1QPSm),
X86_INTRINSIC_DATA(avx512_scattersiv2_df, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scattersiv2_di, SCATTER, 0, 0),
X86_INTRINSIC_DATA(avx512_scattersiv4_df, SCATTER, 0, 0),
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 2fcdc1cedb9a2..db465fdc6b44c 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1298,8 +1298,6 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(X86::FEATURE_AVX512IFMA);
if (HasLeaf7 && ((EBX >> 23) & 1))
setFeature(X86::FEATURE_CLFLUSHOPT);
- if (HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save)
- setFeature(X86::FEATURE_AVX512PF);
if (HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save)
setFeature(X86::FEATURE_AVX512CD);
if (HasLeaf7 && ((EBX >> 29) & 1))
@@ -1806,7 +1804,6 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avx512ifma"] = HasLeaf7 && ((EBX >> 21) & 1) && HasAVX512Save;
Features["clflushopt"] = HasLeaf7 && ((EBX >> 23) & 1);
Features["clwb"] = HasLeaf7 && ((EBX >> 24) & 1);
- Features["avx512pf"] = HasLeaf7 && ((EBX >> 26) & 1) && HasAVX512Save;
Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
Features["sha"] = HasLeaf7 && ((EBX >> 29) & 1);
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll
index 8d09497cefb1b..77053e2c1bc98 100644
--- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll
+++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll
@@ -268,30 +268,6 @@ define void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, ptr %stbuf)
ret void
}
-declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
-declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
-define void @prefetch(<8 x i64> %ind, ptr %base) {
-; CHECK-LABEL: prefetch:
-; CHECK: ## %bb.0:
-; CHECK-NEXT: kxnorw %k0, %k0, %k1
-; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT: kxorw %k0, %k0, %k1
-; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: kmovd %eax, %k1
-; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
-; CHECK-NEXT: movb $120, %al
-; CHECK-NEXT: kmovd %eax, %k1
-; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, ptr %base, i32 4, i32 3)
- call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, ptr %base, i32 4, i32 2)
- call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, ptr %base, i32 2, i32 3)
- call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, ptr %base, i32 2, i32 2)
- ret void
-}
-
declare <2 x double> @llvm.x86.avx512.gather3div2.df(<2 x double>, ptr, <2 x i64>, i8, i32)
define <2 x double>@test_int_x86_avx512_gather3div2_df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) {
diff --git a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
index acbf4387255c5..df71e3c3afa5e 100644
--- a/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
+++ b/llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll
@@ -265,30 +265,6 @@ define dso_local void @gather_qps(<8 x i64> %ind, <8 x float> %src, ptr %base, p
ret void
}
-declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
-declare void @llvm.x86.avx512.scatterpf.qps.512(i8, <8 x i64>, ptr , i32, i32);
-define dso_local void @prefetch(<8 x i64> %ind, ptr %base) {
-; CHECK-LABEL: prefetch:
-; CHECK: # %bb.0:
-; CHECK-NEXT: kxnorw %k0, %k0, %k1
-; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT: kxorw %k0, %k0, %k1
-; CHECK-NEXT: vgatherpf1qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT: movb $1, %al
-; CHECK-NEXT: kmovd %eax, %k1
-; CHECK-NEXT: vscatterpf0qps (%rdi,%zmm0,2) {%k1}
-; CHECK-NEXT: movb $120, %al
-; CHECK-NEXT: kmovd %eax, %k1
-; CHECK-NEXT: vscatterpf1qps (%rdi,%zmm0,2) {%k1}
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
- call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %ind, ptr %base, i32 4, i32 3)
- call void @llvm.x86.avx512.gatherpf.qps.512(i8 0, <8 x i64> %ind, ptr %base, i32 4, i32 2)
- call void @llvm.x86.avx512.scatterpf.qps.512(i8 1, <8 x i64> %ind, ptr %base, i32 2, i32 3)
- call void @llvm.x86.avx512.scatterpf.qps.512(i8 120, <8 x i64> %ind, ptr %base, i32 2, i32 2)
- ret void
-}
-
define <2 x double> @test_int_x86_avx512_mask_gather3div2_df(<2 x double> %x0, ptr %x1, <2 x i64> %x2, i8 %x3) {
; CHECK-LABEL: test_int_x86_avx512_mask_gather3div2_df:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
index 2f5a36865d4ae..7bdbb19d1714d 100644
--- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
+++ b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
@@ -8,16 +8,12 @@ target triple = "x86_64-unknown-linux-gnu"
define dso_local i32 @main() local_unnamed_addr #0 !dbg !7 {
entry:
tail call void @llvm.prefetch(ptr inttoptr (i64 291 to ptr), i32 0, i32 0, i32 1), !dbg !9
- tail call void @llvm.x86.avx512.gatherpf.dpd.512(i8 97, <8 x i32> undef, ptr null, i32 1, i32 2), !dbg !10
ret i32 291, !dbg !11
}
; Function Attrs: inaccessiblemem_or_argmemonly nounwind
declare void @llvm.prefetch(ptr nocapture readonly, i32, i32, i32) #1
-; Function Attrs: argmemonly nounwind
-declare void @llvm.x86.avx512.gatherpf.dpd.512(i8, <8 x i32>, ptr, i32, i32) #2
-
attributes #0 = {"target-cpu"="x86-64" "target-features"="+avx512pf,+sse4.2,+ssse3"}
attributes #1 = { inaccessiblemem_or_argmemonly nounwind }
attributes #2 = { argmemonly nounwind }
@@ -43,4 +39,3 @@ attributes #2 = { argmemonly nounwind }
;CHECK: # %bb.0:
;CHECK: prefetchnta 291
;CHECK-NOT: prefetchnta 42(%rax,%ymm0)
-;CHECK: vgatherpf1dpd (%rax,%ymm0) {%k1}
diff --git a/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll b/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll
index 6e89445bead63..7b3667420ec6d 100644
--- a/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll
+++ b/llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll
@@ -558,28 +558,6 @@ entry:
ret <8 x i64> %v
}
-declare void @llvm.x86.avx512.gatherpf.qps.512(i8, <8 x i64>, ptr, i32, i32);
-
-define void @test_llvm_x86_avx512_gatherpf_qps_512(<8 x i64> %iv, ptr %b) #1 {
-; CHECK-LABEL: test_llvm_x86_avx512_gatherpf_qps_512:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: movq %rsp, %rax
-; CHECK-NEXT: movq $-1, %rcx
-; CHECK-NEXT: sarq $63, %rax
-; CHECK-NEXT: kxnorw %k0, %k0, %k1
-; CHECK-NEXT: orq %rax, %rdi
-; CHECK-NEXT: vpbroadcastq %rax, %zmm1
-; CHECK-NEXT: vporq %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: vgatherpf0qps (%rdi,%zmm0,4) {%k1}
-; CHECK-NEXT: shlq $47, %rax
-; CHECK-NEXT: orq %rax, %rsp
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
-entry:
- call void @llvm.x86.avx512.gatherpf.qps.512(i8 -1, <8 x i64> %iv, ptr %b, i32 4, i32 3)
- ret void
-}
-
declare <4 x float> @llvm.x86.avx512.gather3siv4.sf(<4 x float>, ptr, <4 x i32>, i8, i32)
define <4 x float> @test_llvm_x86_avx512_gather3siv4_sf(ptr %b, <4 x i32> %iv) #2 {
>From 4c509ac5ab9f85b688cba06b309aa129b067795b Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 26 Dec 2023 13:33:16 +0800
Subject: [PATCH 3/6] Remove PREFETCHWT1 intrinsic supports.
---
clang/include/clang/Driver/Options.td | 2 --
clang/lib/Basic/Targets/X86.cpp | 7 -------
clang/lib/Basic/Targets/X86.h | 1 -
clang/lib/Headers/cpuid.h | 2 ++
clang/test/CodeGen/attr-cpuspecific.c | 10 +++++-----
clang/test/CodeGen/attr-target-x86.c | 4 ++--
clang/test/Driver/cl-x86-flags.c | 6 ++----
clang/test/Driver/x86-target-features.c | 5 -----
clang/test/Frontend/x86-target-cpu.c | 6 ++----
.../test/Preprocessor/predefined-arch-macros.c | 4 ----
.../llvm/TargetParser/X86TargetParser.def | 9 +++------
llvm/lib/Target/X86/X86.td | 4 ----
llvm/lib/Target/X86/X86Instr3DNow.td | 3 +--
llvm/lib/Target/X86/X86InstrFragments.td | 8 +-------
llvm/lib/Target/X86/X86InstrPredicates.td | 1 -
llvm/lib/Target/X86/X86Subtarget.h | 7 +++----
llvm/lib/TargetParser/Host.cpp | 1 -
llvm/lib/TargetParser/X86TargetParser.cpp | 2 +-
.../test/CodeGen/X86/avx512-cmp-kor-sequence.ll | 2 +-
llvm/test/CodeGen/X86/prefetch.ll | 17 -----------------
.../LoopStrengthReduce/X86/pr40514.ll | 2 +-
.../Transforms/LoopVectorize/X86/pr23997.ll | 2 +-
.../Transforms/LoopVectorize/X86/pr54634.ll | 2 +-
.../LoopVectorize/X86/scatter_crash.ll | 2 +-
.../Transforms/SLPVectorizer/X86/vector_gep.ll | 2 +-
.../pattern-matching-based-opts-after-delicm.ll | 2 +-
26 files changed, 29 insertions(+), 84 deletions(-)
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index dfd0cac68ea95..515f632f503dd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6205,8 +6205,6 @@ def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
def mprefetchi : Flag<["-"], "mprefetchi">, Group<m_x86_Features_Group>;
def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group<m_x86_Features_Group>;
-def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
-def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>;
def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index edded5759ad6d..3a30cff917bb4 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -369,9 +369,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasWBNOINVD = true;
} else if (Feature == "+prefetchi") {
HasPREFETCHI = true;
- } else if (Feature == "+prefetchwt1") {
- HasPREFETCHWT1 = true;
- Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+clzero") {
HasCLZERO = true;
} else if (Feature == "+cldemote") {
@@ -887,8 +884,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__SM4__");
if (HasPREFETCHI)
Builder.defineMacro("__PREFETCHI__");
- if (HasPREFETCHWT1)
- Builder.defineMacro("__PREFETCHWT1__");
if (HasCLZERO)
Builder.defineMacro("__CLZERO__");
if (HasKL)
@@ -1122,7 +1117,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("pku", true)
.Case("popcnt", true)
.Case("prefetchi", true)
- .Case("prefetchwt1", true)
.Case("prfchw", true)
.Case("ptwrite", true)
.Case("raoint", true)
@@ -1239,7 +1233,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("pku", HasPKU)
.Case("popcnt", HasPOPCNT)
.Case("prefetchi", HasPREFETCHI)
- .Case("prefetchwt1", HasPREFETCHWT1)
.Case("prfchw", HasPRFCHW)
.Case("ptwrite", HasPTWRITE)
.Case("raoint", HasRAOINT)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 281c5ce949ce6..0633b7e0da96a 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -134,7 +134,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasCLWB = false;
bool HasMOVBE = false;
bool HasPREFETCHI = false;
- bool HasPREFETCHWT1 = false;
bool HasRDPID = false;
bool HasRDPRU = false;
bool HasRetpolineExternalThunk = false;
diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index 378d1a9a8bdaa..bb7692efb78ff 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -159,6 +159,8 @@
#define bit_AVX512IFMA 0x00200000
#define bit_CLFLUSHOPT 0x00800000
#define bit_CLWB 0x01000000
+#define bit_AVX512PF 0x04000000
+#define bit_AVX512ER 0x08000000
#define bit_AVX512CD 0x10000000
#define bit_SHA 0x20000000
#define bit_AVX512BW 0x40000000
diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c
index 2c3e6931800cd..628892d5809b4 100644
--- a/clang/test/CodeGen/attr-cpuspecific.c
+++ b/clang/test/CodeGen/attr-cpuspecific.c
@@ -75,8 +75,8 @@ void TwoVersions(void);
// LINUX: define weak_odr ptr @TwoVersions.resolver()
// LINUX: call void @__cpu_indicator_init
// LINUX: %[[FEAT_INIT:.+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
-// LINUX: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 59754495
-// LINUX: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 59754495
+// LINUX: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 9422847
+// LINUX: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 9422847
// LINUX: ret ptr @TwoVersions.Z
// LINUX: ret ptr @TwoVersions.S
// LINUX: call void @llvm.trap
@@ -85,8 +85,8 @@ void TwoVersions(void);
// WINDOWS: define weak_odr dso_local void @TwoVersions() comdat
// WINDOWS: call void @__cpu_indicator_init()
// WINDOWS: %[[FEAT_INIT:.+]] = load i32, ptr getelementptr inbounds ({ i32, i32, i32, [1 x i32] }, ptr @__cpu_model, i32 0, i32 3, i32 0), align 4
-// WINDOWS: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 59754495
-// WINDOWS: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 59754495
+// WINDOWS: %[[FEAT_JOIN:.+]] = and i32 %[[FEAT_INIT]], 9422847
+// WINDOWS: %[[FEAT_CHECK:.+]] = icmp eq i32 %[[FEAT_JOIN]], 9422847
// WINDOWS: call void @TwoVersions.Z()
// WINDOWS-NEXT: ret void
// WINDOWS: call void @TwoVersions.S()
@@ -354,7 +354,7 @@ void OrderDispatchUsageSpecific(void) {}
// CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-SAME: "tune-cpu"="ivybridge"
-// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
+// CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cmov,+crc32,+cx16,+cx8,+evex512,+f16c,+fma,+fsgsbase,+fxsr,+invpcid,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prfchw,+rdrnd,+rdseed,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-SAME: "tune-cpu"="knl"
// CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+movbe,+sahf,+sse,+sse2,+sse3,+ssse3,+x87"
// CHECK-SAME: "tune-cpu"="atom"
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index 304398678216f..3c2b511157f99 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -59,9 +59,9 @@ void __attribute__((target("avx10.1-512"))) avx10_1_512(void) {}
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-NOT: tune-cpu
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx10.1-256,-avx10.1-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-avx10.1-256,-avx10.1-512,-vaes"
// CHECK-NOT: tune-cpu
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-3dnow,-3dnowa,-mmx"
diff --git a/clang/test/Driver/cl-x86-flags.c b/clang/test/Driver/cl-x86-flags.c
index ffa4602fdbeec..51b16f0ce3546 100644
--- a/clang/test/Driver/cl-x86-flags.c
+++ b/clang/test/Driver/cl-x86-flags.c
@@ -69,8 +69,7 @@
// RUN: %clang_cl -m32 -arch:avx2 --target=i386-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx2 %s
// avx2: invalid /arch: argument
-// RUN: %clang_cl -m32 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify=KNL1 -DTEST_32_ARCH_AVX512F -- %s
-// KNL1-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
+// RUN: %clang_cl -m32 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_32_ARCH_AVX512F -- %s
#if defined(TEST_32_ARCH_AVX512F)
#if _M_IX86_FP != 2 || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__
#error fail
@@ -110,8 +109,7 @@
// RUN: %clang_cl -m64 -arch:avx2 --target=x86_64-pc-windows -### -- 2>&1 %s | FileCheck -check-prefix=avx264 %s
// avx264: invalid /arch: argument
-// RUN: %clang_cl -m64 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify=KNL2 -DTEST_64_ARCH_AVX512F -- %s
-// KNL2-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
+// RUN: %clang_cl -m64 -arch:AVX512F --target=i386-pc-windows /c /Fo%t.obj -Xclang -verify -DTEST_64_ARCH_AVX512F -- %s
#if defined(TEST_64_ARCH_AVX512F)
#if _M_IX86_FP || !__AVX__ || !__AVX2__ || !__AVX512F__ || __AVX512BW__
#error fail
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 08674b6e7172e..1d5f001c23fcc 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -86,11 +86,6 @@
// SGX: "-target-feature" "+sgx"
// NO-SGX: "-target-feature" "-sgx"
-// RUN: %clang --target=i386 -march=i386 -mprefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=PREFETCHWT1 %s
-// RUN: %clang --target=i386 -march=i386 -mno-prefetchwt1 %s -### 2>&1 | FileCheck -check-prefix=NO-PREFETCHWT1 %s
-// PREFETCHWT1: "-target-feature" "+prefetchwt1"
-// NO-PREFETCHWT1: "-target-feature" "-prefetchwt1"
-
// RUN: %clang --target=i386 -march=i386 -mprefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=PREFETCHI %s
// RUN: %clang --target=i386 -march=i386 -mno-prefetchi %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-PREFETCHI %s
// PREFETCHI: "-target-feature" "+prefetchi"
diff --git a/clang/test/Frontend/x86-target-cpu.c b/clang/test/Frontend/x86-target-cpu.c
index d2184538594e1..6c8502ac2c21e 100644
--- a/clang/test/Frontend/x86-target-cpu.c
+++ b/clang/test/Frontend/x86-target-cpu.c
@@ -15,10 +15,8 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu cannonlake -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake-client -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu icelake-server -verify %s
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knl -verify=knl %s
-// knl-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knm -verify=knm %s
-// knm-warning@*:* {{KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.}}
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knl -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu knm -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu bonnell -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu silvermont -verify %s
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -target-cpu k8 -verify %s
diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c
index 0152682ba5728..f0a2ef851287f 100644
--- a/clang/test/Preprocessor/predefined-arch-macros.c
+++ b/clang/test/Preprocessor/predefined-arch-macros.c
@@ -806,7 +806,6 @@
// CHECK_KNL_M32: #define __MOVBE__ 1
// CHECK_KNL_M32: #define __PCLMUL__ 1
// CHECK_KNL_M32: #define __POPCNT__ 1
-// CHECK_KNL_M32: #define __PREFETCHWT1__ 1
// CHECK_KNL_M32: #define __PRFCHW__ 1
// CHECK_KNL_M32: #define __RDRND__ 1
// CHECK_KNL_M32: #define __SSE2__ 1
@@ -843,7 +842,6 @@
// CHECK_KNL_M64: #define __MOVBE__ 1
// CHECK_KNL_M64: #define __PCLMUL__ 1
// CHECK_KNL_M64: #define __POPCNT__ 1
-// CHECK_KNL_M64: #define __PREFETCHWT1__ 1
// CHECK_KNL_M64: #define __PRFCHW__ 1
// CHECK_KNL_M64: #define __RDRND__ 1
// CHECK_KNL_M64: #define __SSE2_MATH__ 1
@@ -884,7 +882,6 @@
// CHECK_KNM_M32: #define __MOVBE__ 1
// CHECK_KNM_M32: #define __PCLMUL__ 1
// CHECK_KNM_M32: #define __POPCNT__ 1
-// CHECK_KNM_M32: #define __PREFETCHWT1__ 1
// CHECK_KNM_M32: #define __PRFCHW__ 1
// CHECK_KNM_M32: #define __RDRND__ 1
// CHECK_KNM_M32: #define __SSE2__ 1
@@ -919,7 +916,6 @@
// CHECK_KNM_M64: #define __MOVBE__ 1
// CHECK_KNM_M64: #define __PCLMUL__ 1
// CHECK_KNM_M64: #define __POPCNT__ 1
-// CHECK_KNM_M64: #define __PREFETCHWT1__ 1
// CHECK_KNM_M64: #define __PRFCHW__ 1
// CHECK_KNM_M64: #define __RDRND__ 1
// CHECK_KNM_M64: #define __SSE2_MATH__ 1
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 5670767ff7edf..ecd74447cb68f 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -159,8 +159,8 @@ X86_FEATURE_COMPAT(AVX512VL, "avx512vl", 20)
X86_FEATURE_COMPAT(AVX512BW, "avx512bw", 21)
X86_FEATURE_COMPAT(AVX512DQ, "avx512dq", 22)
X86_FEATURE_COMPAT(AVX512CD, "avx512cd", 23)
-X86_FEATURE_COMPAT(AVX512ER, "avx512er", 24)
-X86_FEATURE_COMPAT(AVX512PF, "avx512pf", 25)
+X86_FEATURE (NF, "nf")
+X86_FEATURE (CF, "cf")
X86_FEATURE_COMPAT(AVX512VBMI, "avx512vbmi", 26)
X86_FEATURE_COMPAT(AVX512IFMA, "avx512ifma", 27)
X86_FEATURE_COMPAT(AVX5124VNNIW, "avx5124vnniw", 28)
@@ -202,7 +202,7 @@ X86_FEATURE_COMPAT(MWAITX, "mwaitx", 0)
X86_FEATURE (X87, "x87")
X86_FEATURE_COMPAT(PCONFIG, "pconfig", 0)
X86_FEATURE_COMPAT(PKU, "pku", 0)
-X86_FEATURE_COMPAT(PREFETCHWT1, "prefetchwt1", 0)
+X86_FEATURE (EVEX512, "evex512")
X86_FEATURE_COMPAT(PRFCHW, "prfchw", 0)
X86_FEATURE_COMPAT(PTWRITE, "ptwrite", 0)
X86_FEATURE_COMPAT(RDPID, "rdpid", 0)
@@ -252,9 +252,6 @@ X86_FEATURE (EGPR, "egpr")
X86_FEATURE_COMPAT(USERMSR, "usermsr", 0)
X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 0)
X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 0)
-X86_FEATURE (EVEX512, "evex512")
-X86_FEATURE (NF, "nf")
-X86_FEATURE (CF, "cf")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 32de52f3aea51..7e8133e3e1ac4 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -133,9 +133,6 @@ def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
def FeaturePREFETCHI : SubtargetFeature<"prefetchi", "HasPREFETCHI",
"true",
"Prefetch instruction with T0 or T1 Hint">;
-def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
- "true",
- "Prefetch with Intent to Write and T1 Hint">;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
"Enable AVX-512 Doubleword and Quadword Instructions",
[FeatureAVX512]>;
@@ -1307,7 +1304,6 @@ def ProcessorFeatures {
FeatureAVX512,
FeatureEVEX512,
FeatureCDI,
- FeaturePREFETCHWT1,
FeatureADX,
FeatureRDSEED,
FeatureMOVBE,
diff --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td
index 3be03ab0f4332..03612de0fad94 100644
--- a/llvm/lib/Target/X86/X86Instr3DNow.td
+++ b/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -90,8 +90,7 @@ def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
TB, Requires<[HasPrefetchW]>;
def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
- [(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))]>,
- TB, Requires<[HasPREFETCHWT1]>;
+ []>, TB;
}
// "3DNowA" instructions
diff --git a/llvm/lib/Target/X86/X86InstrFragments.td b/llvm/lib/Target/X86/X86InstrFragments.td
index f14c7200af968..142e1867e6160 100644
--- a/llvm/lib/Target/X86/X86InstrFragments.td
+++ b/llvm/lib/Target/X86/X86InstrFragments.td
@@ -607,14 +607,8 @@ def X86any_fcmp : PatFrags<(ops node:$lhs, node:$rhs),
[(X86strict_fcmp node:$lhs, node:$rhs),
(X86fcmp node:$lhs, node:$rhs)]>;
-// PREFETCHWT1 is supported we want to use it for everything but T0.
def PrefetchWLevel : PatFrag<(ops), (i32 timm), [{
- return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
-}]>;
-
-// Use PREFETCHWT1 for NTA, T2, T1.
-def PrefetchWT1Level : TImmLeaf<i32, [{
- return Imm < 3;
+ return N->getSExtValue() <= 3;
}]>;
def X86lock_add_nocf : PatFrag<(ops node:$lhs, node:$rhs),
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index 570097bf8dcb4..419ff9e6f5c0f 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -145,7 +145,6 @@ def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">;
def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">;
-def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def HasLAHFSAHF64 : Predicate<"Subtarget->hasLAHFSAHF64()">;
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 4d55a084b730e..de0f0d622de9d 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -213,16 +213,15 @@ class X86Subtarget final : public X86GenSubtargetInfo {
bool hasAnyFMA() const { return hasFMA() || hasFMA4(); }
bool hasPrefetchW() const {
// The PREFETCHW instruction was added with 3DNow but later CPUs gave it
- // its own CPUID bit as part of deprecating 3DNow. Intel eventually added
- // it and KNL has another that prefetches to L2 cache. We assume the
+ // its own CPUID bit as part of deprecating 3DNow. We assume the
// L1 version exists if the L2 version does.
- return hasThreeDNow() || hasPRFCHW() || hasPREFETCHWT1();
+ return hasThreeDNow() || hasPRFCHW();
}
bool hasSSEPrefetch() const {
// We implicitly enable these when we have a write prefix supporting cache
// level OR if we have prfchw, but don't already have a read prefetch from
// 3dnow.
- return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHWT1() ||
+ return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) ||
hasPREFETCHI();
}
bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index db465fdc6b44c..68155acd9e5bc 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1809,7 +1809,6 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
- Features["prefetchwt1"] = HasLeaf7 && ((ECX >> 0) & 1);
Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1);
Features["waitpkg"] = HasLeaf7 && ((ECX >> 5) & 1);
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index ebdb4b09d79ce..a2946b4aec6c7 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -97,7 +97,7 @@ constexpr FeatureBitset FeaturesBroadwell =
// Knights Landing has feature parity with Broadwell.
constexpr FeatureBitset FeaturesKNL =
FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 |
- FeatureAVX512CD | FeatureAVX512PF | FeaturePREFETCHWT1;
+ FeatureAVX512CD;
constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ;
// Intel Skylake processors.
diff --git a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
index bb86f307afa81..b4ba23934d54d 100644
--- a/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
+++ b/llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll
@@ -48,5 +48,5 @@ entry:
; Function Attrs: nounwind readnone
declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32) #1
-attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind readnone uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll
index 404d49b63f25c..c10e0526787d5 100644
--- a/llvm/test/CodeGen/X86/prefetch.ll
+++ b/llvm/test/CodeGen/X86/prefetch.ll
@@ -6,9 +6,6 @@
; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=X86-PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=X86-PRFCHWSSE
; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=X86-SSE
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1 | FileCheck %s -check-prefix=X86-PREFETCHWT1
-; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=X86-PREFETCHWT1
-; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=X86-PREFETCHWT1
; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=X86-3DNOW
; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=X86-3DNOW
@@ -16,7 +13,6 @@
; 3dnow by itself get you just the single prefetch instruction with no hints
; sse provides prefetch0/1/2/nta
; supporting prefetchw, but not 3dnow implicitly provides prefetcht0/1/2/nta regardless of sse setting as we need something to fall back to for the non-write hint.
-; supporting prefetchwt1 implies prefetcht0/1/2/nta and prefetchw regardless of other settings. this allows levels for non-write and gives us an instruction for write+T0
; 3dnow prefetch instruction will only get used if you have no other prefetch instructions enabled
; rdar://10538297
@@ -48,19 +44,6 @@ define void @t(ptr %ptr) nounwind {
; X86-PRFCHWSSE-NEXT: prefetchw (%eax)
; X86-PRFCHWSSE-NEXT: retl
;
-; X86-PREFETCHWT1-LABEL: t:
-; X86-PREFETCHWT1: # %bb.0: # %entry
-; X86-PREFETCHWT1-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-PREFETCHWT1-NEXT: prefetcht2 (%eax)
-; X86-PREFETCHWT1-NEXT: prefetcht1 (%eax)
-; X86-PREFETCHWT1-NEXT: prefetcht0 (%eax)
-; X86-PREFETCHWT1-NEXT: prefetchnta (%eax)
-; X86-PREFETCHWT1-NEXT: prefetchwt1 (%eax)
-; X86-PREFETCHWT1-NEXT: prefetchwt1 (%eax)
-; X86-PREFETCHWT1-NEXT: prefetchw (%eax)
-; X86-PREFETCHWT1-NEXT: prefetchwt1 (%eax)
-; X86-PREFETCHWT1-NEXT: retl
-;
; X86-3DNOW-LABEL: t:
; X86-3DNOW: # %bb.0: # %entry
; X86-3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
index a461f35d00dc9..a6bff63dfc715 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll
@@ -54,4 +54,4 @@ bb10: ; preds = %bb10, %bb
}
-attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
+attributes #0 = { "target-cpu"="broadwell" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,-ptwrite,-xsavec,+popcnt,+aes,-avx512bitalg,-movdiri,-xsaves,-avx512vnni,-avx512vpopcntdq,-pconfig,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-movdir64b,-sse4a,-avx512bw,-clflushopt,+xsave,-avx512vbmi2,+64bit,-avx512vl,+invpcid,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,+f16c,+ssse3,-sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
index 2cba2452440be..3d7153e66fc66 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr23997.ll
@@ -88,7 +88,7 @@ loopexit:
ret void
}
-attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,-prefetchwt1,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
+attributes #0 = { uwtable "target-cpu"="skylake" "target-features"="+sse2,+cx16,+sahf,-tbm,-avx512ifma,-sha,-gfni,-fma4,-vpclmulqdq,+prfchw,+bmi2,-cldemote,+fsgsbase,+xsavec,+popcnt,+aes,-avx512bitalg,+xsaves,-avx512vnni,-avx512vpopcntdq,-clwb,-avx512f,-clzero,-pku,+mmx,-lwp,-rdpid,-xop,+rdseed,-waitpkg,-sse4a,-avx512bw,+clflushopt,+xsave,-avx512vbmi2,-avx512vl,-avx512cd,+avx,-vaes,+rtm,+fma,+bmi,+rdrnd,-mwaitx,+sse4.1,+sse4.2,+avx2,-wbnoinvd,+sse,+lzcnt,+pclmul,+f16c,+ssse3,+sgx,-shstk,+cmov,-avx512vbmi,+movbe,+xsaveopt,-avx512dq,+adx,-avx512pf,+sse3" }
!0 = !{i32 0, i32 2147483646}
!1 = !{}
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
index 20566005c93df..743ca20f92b49 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr54634.ll
@@ -118,7 +118,7 @@ L44: ; preds = %L26
ret ptr addrspace(10) null
}
-attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-sha,-prefetchwt1,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
+attributes #0 = { "target-cpu"="skylake-avx512" "target-features"="+xsaves,+xsavec,+prfchw,+lzcnt,+sahf,+pku,+avx512vl,+avx512bw,+avx512cd,+clwb,+clflushopt,+adx,+avx512dq,+avx512f,+bmi2,+avx2,+bmi,+fsgsbase,+f16c,+avx,+xsave,+aes,+popcnt,+movbe,+sse4.2,+sse4.1,+cx16,+fma,+ssse3,+pclmul,+sse3,-rdrnd,-rtm,-rdseed,-avx512ifma,-avx512pf,-sha,-avx512vbmi,-waitpkg,-avx512vbmi2,-shstk,-gfni,-vaes,-vpclmulqdq,-avx512vnni,-avx512bitalg,-avx512vpopcntdq,-rdpid,-cldemote,-movdiri,-movdir64b,-enqcmd,-avx512vp2intersect,-serialize,-tsxldtrk,-pconfig,-amx-bf16,-amx-tile,-amx-int8,-sse4a,-xop,-lwp,-fma4,-tbm,-mwaitx,-xsaveopt,-clzero,-wbnoinvd,-avx512bf16,-ptwrite,+sse2,+mmx,+fxsr,+64bit,+cx8" }
attributes #1 = { inaccessiblemem_or_argmemonly }
attributes #2 = { allocsize(1) }
diff --git a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
index bf2b9e2aef85a..ce460f4fe3542 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -111,4 +111,4 @@ for.body: ; preds = %for.body.preheader,
br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99
}
-attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+evex512,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-vzeroupper" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
index b8c551c7b771d..9e8cdc62c729a 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll
@@ -26,5 +26,5 @@ entry:
unreachable
}
-attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="broadwell" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512f,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
index 66011168fcc13..060140da0babe 100644
--- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
@@ -93,7 +93,7 @@ for.end27: ; preds = %for.inc25
ret void
}
-attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-clflushopt,-clwb,-clzero,-fma4,-lwp,-mwaitx,-pku,-prefetchwt1,-prfchw,-rdseed,-rtm,-sgx,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-clflushopt,-clwb,-clzero,-fma4,-lwp,-mwaitx,-pku,-prfchw,-rdseed,-rtm,-sgx,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
>From fb5b85a6c2e71721aaeb16396fb0eeac962646b7 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 21 May 2024 17:12:48 +0800
Subject: [PATCH 4/6] MISC
---
llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn | 1 +
.../pattern-matching-based-opts-after-delicm.ll | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
index bfebd944d7a88..0a7cc3854056c 100644
--- a/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Headers/BUILD.gn
@@ -140,6 +140,7 @@ copy("Headers") {
"avx512bwintrin.h",
"avx512cdintrin.h",
"avx512dqintrin.h",
+ "avx512erintrin.h",
"avx512fintrin.h",
"avx512fp16intrin.h",
"avx512ifmaintrin.h",
diff --git a/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll b/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
index 060140da0babe..66011168fcc13 100644
--- a/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
+++ b/polly/test/ScheduleOptimizer/pattern-matching-based-opts-after-delicm.ll
@@ -93,7 +93,7 @@ for.end27: ; preds = %for.inc25
ret void
}
-attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-clflushopt,-clwb,-clzero,-fma4,-lwp,-mwaitx,-pku,-prfchw,-rdseed,-rtm,-sgx,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { norecurse nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="haswell" "target-features"="+aes,+avx,+avx2,+bmi,+bmi2,+cmov,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-adx,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-clflushopt,-clwb,-clzero,-fma4,-lwp,-mwaitx,-pku,-prefetchwt1,-prfchw,-rdseed,-rtm,-sgx,-sha,-sse4a,-tbm,-xop,-xsavec,-xsaves" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
>From 9cc7322f2b9640420a04d533b7d9f72f091e31d4 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Tue, 21 May 2024 17:15:51 +0800
Subject: [PATCH 5/6] clang-format
---
llvm/lib/Target/X86/X86Subtarget.h | 3 +--
llvm/lib/TargetParser/X86TargetParser.cpp | 6 +++---
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index de0f0d622de9d..4532db134fcb4 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -221,8 +221,7 @@ class X86Subtarget final : public X86GenSubtargetInfo {
// We implicitly enable these when we have a write prefix supporting cache
// level OR if we have prfchw, but don't already have a read prefetch from
// 3dnow.
- return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) ||
- hasPREFETCHI();
+ return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHI();
}
bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); }
// These are generic getters that OR together all of the thunk types
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index a2946b4aec6c7..018a7f74bc43d 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -95,9 +95,9 @@ constexpr FeatureBitset FeaturesBroadwell =
// Intel Knights Landing and Knights Mill
// Knights Landing has feature parity with Broadwell.
-constexpr FeatureBitset FeaturesKNL =
- FeaturesBroadwell | FeatureAES | FeatureAVX512F | FeatureEVEX512 |
- FeatureAVX512CD;
+constexpr FeatureBitset FeaturesKNL = FeaturesBroadwell | FeatureAES |
+ FeatureAVX512F | FeatureEVEX512 |
+ FeatureAVX512CD;
constexpr FeatureBitset FeaturesKNM = FeaturesKNL | FeatureAVX512VPOPCNTDQ;
// Intel Skylake processors.
>From 13a0f8d87343fd1a6380a66b7986ad1ce34b06fc Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 22 May 2024 13:58:26 +0800
Subject: [PATCH 6/6] address comments and cont. remove redudant codes
---
clang/docs/ReleaseNotes.rst | 2 +
llvm/docs/ReleaseNotes.rst | 3 +
llvm/lib/Target/X86/X86ISelLowering.cpp | 10 -
llvm/lib/Target/X86/X86ISelLowering.h | 12 -
llvm/lib/Target/X86/X86InstrAVX512.td | 66 ++--
llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 11 -
.../X86/insert-prefetch-invalid-instr.ll | 2 +-
.../llvm-mca/X86/Generic/resources-avx512er.s | 373 ++++++++++++++++++
8 files changed, 417 insertions(+), 62 deletions(-)
create mode 100644 llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 81e9d0423f96a..04a1ed037b2da 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -780,6 +780,8 @@ AMDGPU Support
X86 Support
^^^^^^^^^^^
+- Remove knl/knm specific ISA supports: AVX512PF, AVX512ER, PREFETCHWT1
+
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 84320461fa9e1..28fd474242a4c 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -139,6 +139,9 @@ Changes to the Windows Target
Changes to the X86 Backend
--------------------------
+- Removed knl/knm specific ISA lowerings: AVX512PF, AVX512ER, PREFETCHWT1,
+ while assembly encoding/decoding supports are kept.
+
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 5d0846453685f..0070e12267030 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -33849,18 +33849,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(ADDSUB)
NODE_NAME_CASE(RCP14)
NODE_NAME_CASE(RCP14S)
- NODE_NAME_CASE(RCP28)
- NODE_NAME_CASE(RCP28_SAE)
- NODE_NAME_CASE(RCP28S)
- NODE_NAME_CASE(RCP28S_SAE)
- NODE_NAME_CASE(EXP2)
- NODE_NAME_CASE(EXP2_SAE)
NODE_NAME_CASE(RSQRT14)
NODE_NAME_CASE(RSQRT14S)
- NODE_NAME_CASE(RSQRT28)
- NODE_NAME_CASE(RSQRT28_SAE)
- NODE_NAME_CASE(RSQRT28S)
- NODE_NAME_CASE(RSQRT28S_SAE)
NODE_NAME_CASE(FADD_RND)
NODE_NAME_CASE(FADDS)
NODE_NAME_CASE(FADDS_RND)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index ade54f73bff09..14b9eb7329432 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -699,18 +699,6 @@ namespace llvm {
// Test if in transactional execution.
XTEST,
- // ERI instructions.
- RSQRT28,
- RSQRT28_SAE,
- RSQRT28S,
- RSQRT28S_SAE,
- RCP28,
- RCP28_SAE,
- RCP28S,
- RCP28S_SAE,
- EXP2,
- EXP2_SAE,
-
// Conversions between float and half-float.
CVTPS2PH,
CVTPS2PH_SAE,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 8e122f775a8a7..da690aea43f5c 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -9265,32 +9265,36 @@ multiclass avx512_fp28_s<bits<8> opc, string OpcodeStr,X86VectorVTInfo _,
}
}
-multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain, hasNoSchedulingInfo = 1 in {
+multiclass avx512_fp28_s_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86FoldableSchedWrite sched> {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in {
defm r : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (null_frag)>, Sched<[WriteMove]>;
+ (null_frag)>, Sched<[sched]>, SIMD_EXC;
defm rb : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2), OpcodeStr,
"{sae}, $src2, $src1", "$src1, $src2, {sae}",
- (null_frag)>, Sched<[WriteMove]>, EVEX_B;
+ (null_frag)>, EVEX_B, Sched<[sched]>;
+ let mayLoad = 1 in
defm m : AVX512_maskable_scalar<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.IntScalarMemOp:$src2), OpcodeStr,
"$src2, $src1", "$src1, $src2",
- (null_frag)>, Sched<[WriteMove]>;
+ (null_frag)>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>, SIMD_EXC;
}
}
-multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr> {
- defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info>,
+multiclass avx512_eri_s_ass<bits<8> opc, string OpcodeStr,
+ X86FoldableSchedWrite sched> {
+ defm SSZ : avx512_fp28_s_ass<opc, OpcodeStr#"ss", f32x_info, sched>,
EVEX_CD8<32, CD8VT1>, VEX_LIG, T8, PD, EVEX, VVVV;
- defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info>,
+ defm SDZ : avx512_fp28_s_ass<opc, OpcodeStr#"sd", f64x_info, sched>,
EVEX_CD8<64, CD8VT1>, VEX_LIG, REX_W, T8, PD, EVEX, VVVV;
}
-defm VRCP28 : avx512_eri_s_ass<0xCB, "vrcp28">;
-defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28">;
+defm VRCP28 : avx512_eri_s_ass<0xCB, "vrcp28", SchedWriteFRcp.Scl>;
+defm VRSQRT28 : avx512_eri_s_ass<0xCD, "vrsqrt28", SchedWriteFRsqrt.Scl>;
multiclass avx512_eri_s<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86FoldableSchedWrite sched> {
@@ -9345,42 +9349,48 @@ multiclass avx512_fp28_p_sae<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
EVEX_B, Sched<[sched]>;
}
-multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain, hasNoSchedulingInfo = 1 in {
+multiclass avx512_fp28_p_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86FoldableSchedWrite sched> {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], mayRaiseFPException = 1,
+ hasSideEffects = 0 in {
defm r : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr, "$src", "$src",
- (null_frag)>, Sched<[WriteMove]>;
-
+ (null_frag)>, Sched<[sched]>;
+ let mayLoad = 1 in
defm m : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.MemOp:$src), OpcodeStr, "$src", "$src",
- (null_frag)>, Sched<[WriteMove]>;
-
+ (null_frag)>,
+ Sched<[sched.Folded, sched.ReadAfterFold]>;
+ let mayLoad = 1 in
defm mb : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
(ins _.ScalarMemOp:$src), OpcodeStr,
"${src}"#_.BroadcastStr, "${src}"#_.BroadcastStr,
- (null_frag)>, Sched<[WriteMove]>, EVEX_B;
+ (null_frag)>,
+ EVEX_B, Sched<[sched.Folded, sched.ReadAfterFold]>;
}
}
-multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
- let ExeDomain = _.ExeDomain, Uses = [MXCSR] in
+multiclass avx512_fp28_p_sae_ass<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
+ X86FoldableSchedWrite sched> {
+ let ExeDomain = _.ExeDomain, Uses = [MXCSR], hasSideEffects = 0 in
defm rb : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src), OpcodeStr,
"{sae}, $src", "$src, {sae}",
- (null_frag)>, Sched<[WriteMove]>, EVEX_B;
+ (null_frag)>, Sched<[sched]>, EVEX_B;
}
-multiclass avx512_eri_ass<bits<8> opc, string OpcodeStr> {
- defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info>,
- avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info>,
+multiclass avx512_eri_ass<bits<8> opc, string OpcodeStr,
+ X86SchedWriteWidths sched> {
+ defm PSZ : avx512_fp28_p_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
+ avx512_fp28_p_sae_ass<opc, OpcodeStr#"ps", v16f32_info, sched.ZMM>,
T8, PD, EVEX_V512, EVEX_CD8<32, CD8VF>;
- defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info>,
- avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info>,
+ defm PDZ : avx512_fp28_p_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
+ avx512_fp28_p_sae_ass<opc, OpcodeStr#"pd", v8f64_info, sched.ZMM>,
T8, PD, EVEX_V512, REX_W, EVEX_CD8<64, CD8VF>;
}
-defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28">, EVEX;
-defm VRCP28 : avx512_eri_ass<0xCA, "vrcp28">, EVEX;
-defm VEXP2 : avx512_eri_ass<0xC8, "vexp2">, EVEX;
+defm VRSQRT28 : avx512_eri_ass<0xCC, "vrsqrt28", SchedWriteFRsqrt>, EVEX;
+defm VRCP28 : avx512_eri_ass<0xCA, "vrcp28", SchedWriteFRcp>, EVEX;
+defm VEXP2 : avx512_eri_ass<0xC8, "vexp2", SchedWriteFAdd>, EVEX;
multiclass avx512_eri<bits<8> opc, string OpcodeStr, SDNode OpNode,
SDNode OpNodeSAE, X86SchedWriteWidths sched> {
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index f86e15b3ed5d5..dff33a469b97a 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -600,19 +600,8 @@ def X86Vpdpbusds : SDNode<"X86ISD::VPDPBUSDS", SDTVnni>;
def X86Vpdpwssd : SDNode<"X86ISD::VPDPWSSD", SDTVnni>;
def X86Vpdpwssds : SDNode<"X86ISD::VPDPWSSDS", SDTVnni>;
-def X86rsqrt28 : SDNode<"X86ISD::RSQRT28", SDTFPUnaryOp>;
-def X86rsqrt28SAE: SDNode<"X86ISD::RSQRT28_SAE", SDTFPUnaryOp>;
-def X86rcp28 : SDNode<"X86ISD::RCP28", SDTFPUnaryOp>;
-def X86rcp28SAE : SDNode<"X86ISD::RCP28_SAE", SDTFPUnaryOp>;
-def X86exp2 : SDNode<"X86ISD::EXP2", SDTFPUnaryOp>;
-def X86exp2SAE : SDNode<"X86ISD::EXP2_SAE", SDTFPUnaryOp>;
-
def X86rsqrt14s : SDNode<"X86ISD::RSQRT14S", SDTFPBinOp>;
def X86rcp14s : SDNode<"X86ISD::RCP14S", SDTFPBinOp>;
-def X86rsqrt28s : SDNode<"X86ISD::RSQRT28S", SDTFPBinOp>;
-def X86rsqrt28SAEs : SDNode<"X86ISD::RSQRT28S_SAE", SDTFPBinOp>;
-def X86rcp28s : SDNode<"X86ISD::RCP28S", SDTFPBinOp>;
-def X86rcp28SAEs : SDNode<"X86ISD::RCP28S_SAE", SDTFPBinOp>;
def X86Ranges : SDNode<"X86ISD::VRANGES", SDTFPBinOpImm>;
def X86RndScales : SDNode<"X86ISD::VRNDSCALES", SDTFPBinOpImm>;
def X86Reduces : SDNode<"X86ISD::VREDUCES", SDTFPBinOpImm>;
diff --git a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
index 7bdbb19d1714d..f8e25028cfdee 100644
--- a/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
+++ b/llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll
@@ -14,7 +14,7 @@ entry:
; Function Attrs: inaccessiblemem_or_argmemonly nounwind
declare void @llvm.prefetch(ptr nocapture readonly, i32, i32, i32) #1
-attributes #0 = {"target-cpu"="x86-64" "target-features"="+avx512pf,+sse4.2,+ssse3"}
+attributes #0 = {"target-cpu"="x86-64" "target-features"="+sse4.2,+ssse3"}
attributes #1 = { inaccessiblemem_or_argmemonly nounwind }
attributes #2 = { argmemonly nounwind }
diff --git a/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s
new file mode 100644
index 0000000000000..034fc6d83d153
--- /dev/null
+++ b/llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s
@@ -0,0 +1,373 @@
+# NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -instruction-tables < %s | FileCheck %s
+
+vexp2pd %zmm16, %zmm19
+vexp2pd (%rax), %zmm19
+vexp2pd (%rax){1to8}, %zmm19
+vexp2pd %zmm16, %zmm19 {k1}
+vexp2pd (%rax), %zmm19 {k1}
+vexp2pd (%rax){1to8}, %zmm19 {k1}
+vexp2pd %zmm16, %zmm19 {z}{k1}
+vexp2pd (%rax), %zmm19 {z}{k1}
+vexp2pd (%rax){1to8}, %zmm19 {z}{k1}
+
+vexp2pd {sae}, %zmm16, %zmm19
+vexp2pd {sae}, %zmm16, %zmm19 {k1}
+vexp2pd {sae}, %zmm16, %zmm19 {z}{k1}
+
+vexp2ps %zmm16, %zmm19
+vexp2ps (%rax), %zmm19
+vexp2ps (%rax){1to16}, %zmm19
+vexp2ps %zmm16, %zmm19 {k1}
+vexp2ps (%rax), %zmm19 {k1}
+vexp2ps (%rax){1to16}, %zmm19 {k1}
+vexp2ps %zmm16, %zmm19 {z}{k1}
+vexp2ps (%rax), %zmm19 {z}{k1}
+vexp2ps (%rax){1to16}, %zmm19 {z}{k1}
+
+vexp2ps {sae}, %zmm16, %zmm19
+vexp2ps {sae}, %zmm16, %zmm19 {k1}
+vexp2ps {sae}, %zmm16, %zmm19 {z}{k1}
+
+vrcp28pd %zmm16, %zmm19
+vrcp28pd (%rax), %zmm19
+vrcp28pd (%rax){1to8}, %zmm19
+vrcp28pd %zmm16, %zmm19 {k1}
+vrcp28pd (%rax), %zmm19 {k1}
+vrcp28pd (%rax){1to8}, %zmm19 {k1}
+vrcp28pd %zmm16, %zmm19 {z}{k1}
+vrcp28pd (%rax), %zmm19 {z}{k1}
+vrcp28pd (%rax){1to8}, %zmm19 {z}{k1}
+
+vrcp28pd {sae}, %zmm16, %zmm19
+vrcp28pd {sae}, %zmm16, %zmm19 {k1}
+vrcp28pd {sae}, %zmm16, %zmm19 {z}{k1}
+
+vrcp28ps %zmm16, %zmm19
+vrcp28ps (%rax), %zmm19
+vrcp28ps (%rax){1to16}, %zmm19
+vrcp28ps %zmm16, %zmm19 {k1}
+vrcp28ps (%rax), %zmm19 {k1}
+vrcp28ps (%rax){1to16}, %zmm19 {k1}
+vrcp28ps %zmm16, %zmm19 {z}{k1}
+vrcp28ps (%rax), %zmm19 {z}{k1}
+vrcp28ps (%rax){1to16}, %zmm19 {z}{k1}
+
+vrcp28ps {sae}, %zmm16, %zmm19
+vrcp28ps {sae}, %zmm16, %zmm19 {k1}
+vrcp28ps {sae}, %zmm16, %zmm19 {z}{k1}
+
+vrcp28sd %xmm16, %xmm17, %xmm19
+vrcp28sd (%rax), %xmm17, %xmm19
+vrcp28sd %xmm16, %xmm17, %xmm19 {k1}
+vrcp28sd (%rax), %xmm17, %xmm19 {k1}
+vrcp28sd %xmm16, %xmm17, %xmm19 {z}{k1}
+vrcp28sd (%rax), %xmm17, %xmm19 {z}{k1}
+
+vrcp28sd {sae}, %xmm16, %xmm17, %xmm19
+vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {k1}
+vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
+
+vrcp28ss %xmm16, %xmm17, %xmm19
+vrcp28ss (%rax), %xmm17, %xmm19
+vrcp28ss %xmm16, %xmm17, %xmm19 {k1}
+vrcp28ss (%rax), %xmm17, %xmm19 {k1}
+vrcp28ss %xmm16, %xmm17, %xmm19 {z}{k1}
+vrcp28ss (%rax), %xmm17, %xmm19 {z}{k1}
+
+vrcp28ss {sae}, %xmm16, %xmm17, %xmm19
+vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {k1}
+vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
+
+vrsqrt28pd %zmm16, %zmm19
+vrsqrt28pd (%rax), %zmm19
+vrsqrt28pd (%rax){1to8}, %zmm19
+vrsqrt28pd %zmm16, %zmm19 {k1}
+vrsqrt28pd (%rax), %zmm19 {k1}
+vrsqrt28pd (%rax){1to8}, %zmm19 {k1}
+vrsqrt28pd %zmm16, %zmm19 {z}{k1}
+vrsqrt28pd (%rax), %zmm19 {z}{k1}
+vrsqrt28pd (%rax){1to8}, %zmm19 {z}{k1}
+
+vrsqrt28pd {sae}, %zmm16, %zmm19
+vrsqrt28pd {sae}, %zmm16, %zmm19 {k1}
+vrsqrt28pd {sae}, %zmm16, %zmm19 {z}{k1}
+
+vrsqrt28ps %zmm16, %zmm19
+vrsqrt28ps (%rax), %zmm19
+vrsqrt28ps (%rax){1to16}, %zmm19
+vrsqrt28ps %zmm16, %zmm19 {k1}
+vrsqrt28ps (%rax), %zmm19 {k1}
+vrsqrt28ps (%rax){1to16}, %zmm19 {k1}
+vrsqrt28ps %zmm16, %zmm19 {z}{k1}
+vrsqrt28ps (%rax), %zmm19 {z}{k1}
+vrsqrt28ps (%rax){1to16}, %zmm19 {z}{k1}
+
+vrsqrt28ps {sae}, %zmm16, %zmm19
+vrsqrt28ps {sae}, %zmm16, %zmm19 {k1}
+vrsqrt28ps {sae}, %zmm16, %zmm19 {z}{k1}
+
+vrsqrt28sd %xmm16, %xmm17, %xmm19
+vrsqrt28sd (%rax), %xmm17, %xmm19
+vrsqrt28sd %xmm16, %xmm17, %xmm19 {k1}
+vrsqrt28sd (%rax), %xmm17, %xmm19 {k1}
+vrsqrt28sd %xmm16, %xmm17, %xmm19 {z}{k1}
+vrsqrt28sd (%rax), %xmm17, %xmm19 {z}{k1}
+
+vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19
+vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {k1}
+vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
+
+vrsqrt28ss %xmm16, %xmm17, %xmm19
+vrsqrt28ss (%rax), %xmm17, %xmm19
+vrsqrt28ss %xmm16, %xmm17, %xmm19 {k1}
+vrsqrt28ss (%rax), %xmm17, %xmm19 {k1}
+vrsqrt28ss %xmm16, %xmm17, %xmm19 {z}{k1}
+vrsqrt28ss (%rax), %xmm17, %xmm19 {z}{k1}
+
+vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19
+vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {k1}
+vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {z}{k1}
+
+# CHECK: Instruction Info:
+# CHECK-NEXT: [1]: #uOps
+# CHECK-NEXT: [2]: Latency
+# CHECK-NEXT: [3]: RThroughput
+# CHECK-NEXT: [4]: MayLoad
+# CHECK-NEXT: [5]: MayStore
+# CHECK-NEXT: [6]: HasSideEffects (U)
+
+# CHECK: [1] [2] [3] [4] [5] [6] Instructions:
+# CHECK-NEXT: 1 3 1.00 vexp2pd %zmm16, %zmm19
+# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax), %zmm19
+# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax){1to8}, %zmm19
+# CHECK-NEXT: 1 3 1.00 vexp2pd %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax), %zmm19 {%k1}
+# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax){1to8}, %zmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vexp2pd %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 10 1.00 * vexp2pd (%rax){1to8}, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vexp2pd {sae}, %zmm16, %zmm19
+# CHECK-NEXT: 1 3 1.00 vexp2pd {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vexp2pd {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vexp2ps %zmm16, %zmm19
+# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax), %zmm19
+# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax){1to16}, %zmm19
+# CHECK-NEXT: 1 3 1.00 vexp2ps %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax), %zmm19 {%k1}
+# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax){1to16}, %zmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vexp2ps %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: 2 10 1.00 * vexp2ps (%rax){1to16}, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 3 1.00 vexp2ps {sae}, %zmm16, %zmm19
+# CHECK-NEXT: 1 3 1.00 vexp2ps {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 1 3 1.00 vexp2ps {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 3 7 2.00 vrcp28pd %zmm16, %zmm19
+# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax), %zmm19
+# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax){1to8}, %zmm19
+# CHECK-NEXT: 3 7 2.00 vrcp28pd %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax), %zmm19 {%k1}
+# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax){1to8}, %zmm19 {%k1}
+# CHECK-NEXT: 3 7 2.00 vrcp28pd %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: 4 14 2.00 * vrcp28pd (%rax){1to8}, %zmm19 {%k1} {z}
+# CHECK-NEXT: 3 7 2.00 vrcp28pd {sae}, %zmm16, %zmm19
+# CHECK-NEXT: 3 7 2.00 vrcp28pd {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 3 7 2.00 vrcp28pd {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 3 7 2.00 vrcp28ps %zmm16, %zmm19
+# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax), %zmm19
+# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax){1to16}, %zmm19
+# CHECK-NEXT: 3 7 2.00 vrcp28ps %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax), %zmm19 {%k1}
+# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax){1to16}, %zmm19 {%k1}
+# CHECK-NEXT: 3 7 2.00 vrcp28ps %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: 4 14 2.00 * vrcp28ps (%rax){1to16}, %zmm19 {%k1} {z}
+# CHECK-NEXT: 3 7 2.00 vrcp28ps {sae}, %zmm16, %zmm19
+# CHECK-NEXT: 3 7 2.00 vrcp28ps {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 3 7 2.00 vrcp28ps {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 5 1.00 vrcp28sd %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 2 11 1.00 * vrcp28sd (%rax), %xmm17, %xmm19
+# CHECK-NEXT: 1 5 1.00 vrcp28sd %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vrcp28sd (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 5 1.00 vrcp28sd %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vrcp28sd (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 5 1.00 vrcp28sd {sae}, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 1 5 1.00 vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 5 1.00 vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 5 1.00 vrcp28ss %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 2 11 1.00 * vrcp28ss (%rax), %xmm17, %xmm19
+# CHECK-NEXT: 1 5 1.00 vrcp28ss %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vrcp28ss (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 5 1.00 vrcp28ss %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vrcp28ss (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 5 1.00 vrcp28ss {sae}, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 1 5 1.00 vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 5 1.00 vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 3 7 2.00 vrsqrt28pd %zmm16, %zmm19
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax), %zmm19
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax){1to8}, %zmm19
+# CHECK-NEXT: 3 7 2.00 vrsqrt28pd %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax), %zmm19 {%k1}
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax){1to8}, %zmm19 {%k1}
+# CHECK-NEXT: 3 7 2.00 vrsqrt28pd %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28pd (%rax){1to8}, %zmm19 {%k1} {z}
+# CHECK-NEXT: 3 7 2.00 vrsqrt28pd {sae}, %zmm16, %zmm19
+# CHECK-NEXT: 3 7 2.00 vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 3 7 2.00 vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 3 7 2.00 vrsqrt28ps %zmm16, %zmm19
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax), %zmm19
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax){1to16}, %zmm19
+# CHECK-NEXT: 3 7 2.00 vrsqrt28ps %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax), %zmm19 {%k1}
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax){1to16}, %zmm19 {%k1}
+# CHECK-NEXT: 3 7 2.00 vrsqrt28ps %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: 4 14 2.00 * vrsqrt28ps (%rax){1to16}, %zmm19 {%k1} {z}
+# CHECK-NEXT: 3 7 2.00 vrsqrt28ps {sae}, %zmm16, %zmm19
+# CHECK-NEXT: 3 7 2.00 vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: 3 7 2.00 vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: 1 5 1.00 vrsqrt28sd %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 2 11 1.00 * vrsqrt28sd (%rax), %xmm17, %xmm19
+# CHECK-NEXT: 1 5 1.00 vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 5 1.00 vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 5 1.00 vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 1 5 1.00 vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 5 1.00 vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 5 1.00 vrsqrt28ss %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 2 11 1.00 * vrsqrt28ss (%rax), %xmm17, %xmm19
+# CHECK-NEXT: 1 5 1.00 vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 2 11 1.00 * vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 5 1.00 vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 2 11 1.00 * vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: 1 5 1.00 vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: 1 5 1.00 vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: 1 5 1.00 vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
+
+# CHECK: Resources:
+# CHECK-NEXT: [0] - SBDivider
+# CHECK-NEXT: [1] - SBFPDivider
+# CHECK-NEXT: [2] - SBPort0
+# CHECK-NEXT: [3] - SBPort1
+# CHECK-NEXT: [4] - SBPort4
+# CHECK-NEXT: [5] - SBPort5
+# CHECK-NEXT: [6.0] - SBPort23
+# CHECK-NEXT: [6.1] - SBPort23
+
+# CHECK: Resource pressure per iteration:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1]
+# CHECK-NEXT: - - 156.00 24.00 - 24.00 24.00 24.00
+
+# CHECK: Resource pressure by instruction:
+# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions:
+# CHECK-NEXT: - - - 1.00 - - - - vexp2pd %zmm16, %zmm19
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax), %zmm19
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax){1to8}, %zmm19
+# CHECK-NEXT: - - - 1.00 - - - - vexp2pd %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax), %zmm19 {%k1}
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax){1to8}, %zmm19 {%k1}
+# CHECK-NEXT: - - - 1.00 - - - - vexp2pd %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2pd (%rax){1to8}, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - 1.00 - - - - vexp2pd {sae}, %zmm16, %zmm19
+# CHECK-NEXT: - - - 1.00 - - - - vexp2pd {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - - 1.00 - - - - vexp2pd {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - 1.00 - - - - vexp2ps %zmm16, %zmm19
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax), %zmm19
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax){1to16}, %zmm19
+# CHECK-NEXT: - - - 1.00 - - - - vexp2ps %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax), %zmm19 {%k1}
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax){1to16}, %zmm19 {%k1}
+# CHECK-NEXT: - - - 1.00 - - - - vexp2ps %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - 1.00 - - 0.50 0.50 vexp2ps (%rax){1to16}, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - - 1.00 - - - - vexp2ps {sae}, %zmm16, %zmm19
+# CHECK-NEXT: - - - 1.00 - - - - vexp2ps {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - - 1.00 - - - - vexp2ps {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd %zmm16, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax), %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax){1to8}, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax), %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax){1to8}, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28pd (%rax){1to8}, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd {sae}, %zmm16, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28pd {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps %zmm16, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax), %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax){1to16}, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax), %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax){1to16}, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrcp28ps (%rax){1to16}, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps {sae}, %zmm16, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrcp28ps {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28sd (%rax), %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28sd (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28sd (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd {sae}, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28ss (%rax), %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28ss (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrcp28ss (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss {sae}, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - - - vrcp28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd %zmm16, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax), %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax){1to8}, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax), %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax){1to8}, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28pd (%rax){1to8}, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd {sae}, %zmm16, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28pd {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps %zmm16, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax), %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax){1to16}, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax), %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax){1to16}, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax), %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 0.50 0.50 vrsqrt28ps (%rax){1to16}, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps {sae}, %zmm16, %zmm19
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1}
+# CHECK-NEXT: - - 2.50 - - 0.50 - - vrsqrt28ps {sae}, %zmm16, %zmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28sd (%rax), %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28sd (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28sd {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28ss (%rax), %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss %xmm16, %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - 0.50 0.50 vrsqrt28ss (%rax), %xmm17, %xmm19 {%k1} {z}
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1}
+# CHECK-NEXT: - - 1.00 - - - - - vrsqrt28ss {sae}, %xmm16, %xmm17, %xmm19 {%k1} {z}
More information about the cfe-commits
mailing list