[clang] [llvm] Revert "[X86] Remove knl/knm specific ISAs supports (#92883)" (PR #93123)
via llvm-commits
llvm-commits at lists.llvm.org
Wed May 22 19:25:42 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-driver
@llvm/pr-subscribers-clang
Author: Freddy Ye (FreddyLeaf)
<details>
<summary>Changes</summary>
This reverts commit 282d2ab58f56c89510f810a43d4569824a90c538.
---
Patch is 141.61 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93123.diff
53 Files Affected:
- (modified) clang/docs/ReleaseNotes.rst (-2)
- (modified) clang/include/clang/Basic/BuiltinsX86.def (+21)
- (modified) clang/include/clang/Basic/DiagnosticCommonKinds.td (+3)
- (modified) clang/include/clang/Driver/Options.td (+6)
- (modified) clang/lib/Basic/Targets/X86.cpp (+21)
- (modified) clang/lib/Basic/Targets/X86.h (+3)
- (modified) clang/lib/Headers/CMakeLists.txt (+2)
- (added) clang/lib/Headers/avx512erintrin.h (+271)
- (added) clang/lib/Headers/avx512pfintrin.h (+92)
- (modified) clang/lib/Headers/immintrin.h (+8)
- (modified) clang/lib/Headers/module.modulemap (+1)
- (modified) clang/lib/Sema/SemaChecking.cpp (+30)
- (added) clang/test/CodeGen/X86/avx512er-builtins.c (+347)
- (added) clang/test/CodeGen/X86/avx512pf-builtins.c (+100)
- (modified) clang/test/CodeGen/attr-cpuspecific.c (+5-5)
- (modified) clang/test/CodeGen/attr-target-x86.c (+2-2)
- (modified) clang/test/CodeGen/function-target-features.c (+2-2)
- (modified) clang/test/CodeGen/target-builtin-noerror.c (+2)
- (modified) clang/test/Driver/cl-x86-flags.c (+8-2)
- (modified) clang/test/Driver/x86-target-features.c (+9-4)
- (modified) clang/test/Frontend/x86-target-cpu.c (+8-2)
- (modified) clang/test/Preprocessor/predefined-arch-macros.c (+12)
- (modified) clang/test/Preprocessor/x86_target_features.c (+50)
- (modified) clang/test/Sema/builtins-x86.c (+8)
- (modified) llvm/docs/ReleaseNotes.rst (-3)
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+84)
- (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+6-3)
- (modified) llvm/lib/Target/X86/X86.td (+12)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+10)
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+12)
- (modified) llvm/lib/Target/X86/X86Instr3DNow.td (+2-1)
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+16-75)
- (modified) llvm/lib/Target/X86/X86InstrFragments.td (+7-1)
- (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+11)
- (modified) llvm/lib/Target/X86/X86InstrPredicates.td (+3)
- (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+27)
- (modified) llvm/lib/Target/X86/X86Subtarget.h (+5-3)
- (modified) llvm/lib/TargetParser/Host.cpp (+9)
- (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+3-3)
- (modified) llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll (+24)
- (modified) llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll (+24)
- (added) llvm/test/CodeGen/X86/avx512er-intrinsics.ll (+306)
- (modified) llvm/test/CodeGen/X86/crc32-target-feature.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll (+6-1)
- (modified) llvm/test/CodeGen/X86/prefetch.ll (+17)
- (modified) llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll (+22)
- (modified) llvm/test/CodeGen/X86/unfoldMemoryOperand.mir (+1-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr23997.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr54634.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll (+1-1)
``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d410d8acd135b..0c4a343b70009 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -801,8 +801,6 @@ AMDGPU Support
X86 Support
^^^^^^^^^^^
-- Remove knl/knm specific ISA supports: AVX512PF, AVX512ER, PREFETCHWT1
-
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 7074479786b97..eafcc219c1096 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -832,11 +832,23 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx
TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
+
TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
+TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
+
TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
@@ -948,6 +960,15 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx
TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vv*UcV8OiV8OiIi", "nV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8ivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16ivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
+
TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
diff --git a/clang/include/clang/Basic/DiagnosticCommonKinds.td b/clang/include/clang/Basic/DiagnosticCommonKinds.td
index 1e44bc4ad09b6..0738f43ca555c 100644
--- a/clang/include/clang/Basic/DiagnosticCommonKinds.td
+++ b/clang/include/clang/Basic/DiagnosticCommonKinds.td
@@ -361,6 +361,9 @@ def warn_invalid_feature_combination : Warning<
def warn_target_unrecognized_env : Warning<
"mismatch between architecture and environment in target triple '%0'; did you mean '%1'?">,
InGroup<InvalidCommandLineArgument>;
+def warn_knl_knm_isa_support_removed : Warning<
+ "KNL, KNM related Intel Xeon Phi CPU's specific ISA's supports will be removed in LLVM 19.">,
+ InGroup<DiagGroup<"knl-knm-isa-support-removed">>;
def err_target_unsupported_abi_with_fpu : Error<
"'%0' ABI is not supported with FPU">;
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 9a5bffce20460..8cbb7f854ee72 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6111,10 +6111,14 @@ def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
+def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
+def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>;
def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group<m_x86_Features_Group>;
def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group<m_x86_Features_Group>;
+def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
+def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>;
def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>;
@@ -6205,6 +6209,8 @@ def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
def mprefetchi : Flag<["-"], "mprefetchi">, Group<m_x86_Features_Group>;
def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group<m_x86_Features_Group>;
+def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
+def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>;
def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 3a30cff917bb4..b823eaf6ce336 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -310,9 +310,15 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512VNNI = true;
} else if (Feature == "+avx512bf16") {
HasAVX512BF16 = true;
+ } else if (Feature == "+avx512er") {
+ HasAVX512ER = true;
+ Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+avx512fp16") {
HasAVX512FP16 = true;
HasLegalHalfType = true;
+ } else if (Feature == "+avx512pf") {
+ HasAVX512PF = true;
+ Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+avx512dq") {
HasAVX512DQ = true;
} else if (Feature == "+avx512bitalg") {
@@ -369,6 +375,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasWBNOINVD = true;
} else if (Feature == "+prefetchi") {
HasPREFETCHI = true;
+ } else if (Feature == "+prefetchwt1") {
+ HasPREFETCHWT1 = true;
+ Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+clzero") {
HasCLZERO = true;
} else if (Feature == "+cldemote") {
@@ -831,8 +840,12 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512VNNI__");
if (HasAVX512BF16)
Builder.defineMacro("__AVX512BF16__");
+ if (HasAVX512ER)
+ Builder.defineMacro("__AVX512ER__");
if (HasAVX512FP16)
Builder.defineMacro("__AVX512FP16__");
+ if (HasAVX512PF)
+ Builder.defineMacro("__AVX512PF__");
if (HasAVX512DQ)
Builder.defineMacro("__AVX512DQ__");
if (HasAVX512BITALG)
@@ -884,6 +897,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__SM4__");
if (HasPREFETCHI)
Builder.defineMacro("__PREFETCHI__");
+ if (HasPREFETCHWT1)
+ Builder.defineMacro("__PREFETCHWT1__");
if (HasCLZERO)
Builder.defineMacro("__CLZERO__");
if (HasKL)
@@ -1069,7 +1084,9 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512vpopcntdq", true)
.Case("avx512vnni", true)
.Case("avx512bf16", true)
+ .Case("avx512er", true)
.Case("avx512fp16", true)
+ .Case("avx512pf", true)
.Case("avx512dq", true)
.Case("avx512bitalg", true)
.Case("avx512bw", true)
@@ -1117,6 +1134,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("pku", true)
.Case("popcnt", true)
.Case("prefetchi", true)
+ .Case("prefetchwt1", true)
.Case("prfchw", true)
.Case("ptwrite", true)
.Case("raoint", true)
@@ -1183,7 +1201,9 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
.Case("avx512vnni", HasAVX512VNNI)
.Case("avx512bf16", HasAVX512BF16)
+ .Case("avx512er", HasAVX512ER)
.Case("avx512fp16", HasAVX512FP16)
+ .Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
.Case("avx512bitalg", HasAVX512BITALG)
.Case("avx512bw", HasAVX512BW)
@@ -1233,6 +1253,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("pku", HasPKU)
.Case("popcnt", HasPOPCNT)
.Case("prefetchi", HasPREFETCHI)
+ .Case("prefetchwt1", HasPREFETCHWT1)
.Case("prfchw", HasPRFCHW)
.Case("ptwrite", HasPTWRITE)
.Case("raoint", HasRAOINT)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 0633b7e0da96a..6a0a6cb84203d 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -103,6 +103,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512VNNI = false;
bool HasAVX512FP16 = false;
bool HasAVX512BF16 = false;
+ bool HasAVX512ER = false;
+ bool HasAVX512PF = false;
bool HasAVX512DQ = false;
bool HasAVX512BITALG = false;
bool HasAVX512BW = false;
@@ -134,6 +136,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasCLWB = false;
bool HasMOVBE = false;
bool HasPREFETCHI = false;
+ bool HasPREFETCHWT1 = false;
bool HasRDPID = false;
bool HasRDPRU = false;
bool HasRetpolineExternalThunk = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index dbff92b4e59b4..5f02c71f6ca51 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -153,10 +153,12 @@ set(x86_files
avx512bwintrin.h
avx512cdintrin.h
avx512dqintrin.h
+ avx512erintrin.h
avx512fintrin.h
avx512fp16intrin.h
avx512ifmaintrin.h
avx512ifmavlintrin.h
+ avx512pfintrin.h
avx512vbmi2intrin.h
avx512vbmiintrin.h
avx512vbmivlintrin.h
diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h
new file mode 100644
index 0000000000000..1c5a2d2d208ff
--- /dev/null
+++ b/clang/lib/Headers/avx512erintrin.h
@@ -0,0 +1,271 @@
+/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __AVX512ERINTRIN_H
+#define __AVX512ERINTRIN_H
+
+/* exp2a23 */
+#define _mm512_exp2a23_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
+ ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm512_exp2a23_pd(A) \
+ _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_pd(S, M, A) \
+ _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_pd(M, A) \
+ _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_exp2a23_round_ps(A, R) \
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
+ ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
+
+#define _mm512_exp2a23_ps(A) \
+ _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_exp2a23_ps(S, M, A) \
+ _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_exp2a23_ps(M, A) \
+ _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+/* rsqrt28 */
+#define _mm512_rsqrt28_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
+ ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm512_rsqrt28_pd(A) \
+ _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_pd(S, M, A) \
+ _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_pd(M, A) \
+ _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rsqrt28_round_ps(A, R) \
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)-1, (int)(R)))
+
+#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)(__m512)(S), (__mmask16)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
+ ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
+ (__v16sf)_mm512_setzero_ps(), \
+ (__mmask16)(M), (int)(R)))
+
+#define _mm512_rsqrt28_ps(A) \
+ _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rsqrt28_ps(S, M, A) \
+ _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rsqrt28_ps(M, A) \
+ _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_ss(A, B, R) \
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)(__m128)(S), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
+ ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
+ (__v4sf)(__m128)(B), \
+ (__v4sf)_mm_setzero_ps(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_rsqrt28_ss(A, B) \
+ _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_ss(S, M, A, B) \
+ _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_ss(M, A, B) \
+ _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_rsqrt28_round_sd(A, B, R) \
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)(__m128d)(S), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
+ ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
+ (__v2df)(__m128d)(B), \
+ (__v2df)_mm_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm_rsqrt28_sd(A, B) \
+ _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_mask_rsqrt28_sd(S, M, A, B) \
+ _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm_maskz_rsqrt28_sd(M, A, B) \
+ _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
+
+/* rcp28 */
+#define _mm512_rcp28_round_pd(A, R) \
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)-1, (int)(R)))
+
+#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)(__m512d)(S), (__mmask8)(M), \
+ (int)(R)))
+
+#define _mm512_maskz_rcp28_round_pd(M, A, R) \
+ ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
+ (__v8df)_mm512_setzero_pd(), \
+ (__mmask8)(M), (int)(R)))
+
+#define _mm512_rcp28_pd(A) \
+ _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_mask_rcp28_pd(S, M, A) \
+ _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_maskz_rcp28_pd(M, A) \
+ _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
+
+#define _mm512_rcp28_round_ps(A, R) \
+ ((__m...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/93123
More information about the llvm-commits
mailing list