[clang] [llvm] [polly] [X86] Remove knl/knm specific ISAs supports (PR #92883)
via llvm-commits
llvm-commits at lists.llvm.org
Tue May 21 02:19:25 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-driver
@llvm/pr-subscribers-clang
Author: Freddy Ye (FreddyLeaf)
<details>
<summary>Changes</summary>
Cont. patch after https://github.com/llvm/llvm-project/pull/75580
---
Patch is 163.02 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/92883.diff
47 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86.def (-21)
- (modified) clang/include/clang/Driver/Options.td (-6)
- (modified) clang/lib/Basic/Targets/X86.cpp (-21)
- (modified) clang/lib/Basic/Targets/X86.h (-3)
- (modified) clang/lib/Headers/CMakeLists.txt (-2)
- (removed) clang/lib/Headers/avx512erintrin.h (-271)
- (removed) clang/lib/Headers/avx512pfintrin.h (-92)
- (modified) clang/lib/Headers/immintrin.h (-8)
- (modified) clang/lib/Sema/SemaChecking.cpp (-30)
- (removed) clang/test/CodeGen/X86/avx512er-builtins.c (-347)
- (removed) clang/test/CodeGen/X86/avx512pf-builtins.c (-100)
- (modified) clang/test/CodeGen/attr-cpuspecific.c (+5-5)
- (modified) clang/test/CodeGen/attr-target-x86.c (+2-2)
- (modified) clang/test/CodeGen/function-target-features.c (+2-2)
- (modified) clang/test/CodeGen/target-builtin-noerror.c (-2)
- (modified) clang/test/Driver/cl-x86-flags.c (+2-8)
- (modified) clang/test/Driver/x86-target-features.c (+4-9)
- (modified) clang/test/Frontend/x86-target-cpu.c (+2-8)
- (modified) clang/test/Preprocessor/predefined-arch-macros.c (-12)
- (modified) clang/test/Preprocessor/x86_target_features.c (-50)
- (modified) clang/test/Sema/builtins-x86.c (-8)
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (-84)
- (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+3-6)
- (modified) llvm/lib/Target/X86/X86.td (-12)
- (modified) llvm/lib/Target/X86/X86Instr3DNow.td (+1-2)
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+65-16)
- (modified) llvm/lib/Target/X86/X86InstrFragments.td (+1-7)
- (modified) llvm/lib/Target/X86/X86InstrPredicates.td (-3)
- (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (-27)
- (modified) llvm/lib/Target/X86/X86Subtarget.h (+3-5)
- (modified) llvm/lib/TargetParser/Host.cpp (-9)
- (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+3-3)
- (modified) llvm/test/CodeGen/X86/avx512-cmp-kor-sequence.ll (+1-1)
- (modified) llvm/test/CodeGen/X86/avx512-gather-scatter-intrin-deprecated.ll (-24)
- (modified) llvm/test/CodeGen/X86/avx512-gather-scatter-intrin.ll (-24)
- (removed) llvm/test/CodeGen/X86/avx512er-intrinsics.ll (-306)
- (modified) llvm/test/CodeGen/X86/crc32-target-feature.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/insert-prefetch-invalid-instr.ll (-5)
- (modified) llvm/test/CodeGen/X86/prefetch.ll (-17)
- (modified) llvm/test/CodeGen/X86/speculative-load-hardening-gather.ll (-22)
- (modified) llvm/test/CodeGen/X86/unfoldMemoryOperand.mir (+1-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr23997.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/pr54634.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/X86/scatter_crash.ll (+1-1)
- (modified) llvm/test/Transforms/SLPVectorizer/X86/vector_gep.ll (+1-1)
- (removed) llvm/test/tools/llvm-mca/X86/Generic/resources-avx512er.s (-373)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index eafcc219c1096..7074479786b97 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -832,23 +832,11 @@ TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx
TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
-
TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er,evex512")
-TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er,evex512")
-
TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f,evex512")
@@ -960,15 +948,6 @@ TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vv*UsV16iV16iIi", "nV:512:", "avx
TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vv*UcV8OiV8OiIi", "nV:512:", "avx512f,evex512")
TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vv*UcV8OiV8iIi", "nV:512:", "avx512f,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8ivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16ivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8OivC*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iv*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16iv*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512pf,evex512")
-
TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 7bb781667e926..515f632f503dd 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6111,14 +6111,10 @@ def mavx512cd : Flag<["-"], "mavx512cd">, Group<m_x86_Features_Group>;
def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group<m_x86_Features_Group>;
def mavx512dq : Flag<["-"], "mavx512dq">, Group<m_x86_Features_Group>;
def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group<m_x86_Features_Group>;
-def mavx512er : Flag<["-"], "mavx512er">, Group<m_x86_Features_Group>;
-def mno_avx512er : Flag<["-"], "mno-avx512er">, Group<m_x86_Features_Group>;
def mavx512fp16 : Flag<["-"], "mavx512fp16">, Group<m_x86_Features_Group>;
def mno_avx512fp16 : Flag<["-"], "mno-avx512fp16">, Group<m_x86_Features_Group>;
def mavx512ifma : Flag<["-"], "mavx512ifma">, Group<m_x86_Features_Group>;
def mno_avx512ifma : Flag<["-"], "mno-avx512ifma">, Group<m_x86_Features_Group>;
-def mavx512pf : Flag<["-"], "mavx512pf">, Group<m_x86_Features_Group>;
-def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group<m_x86_Features_Group>;
def mavx512vbmi : Flag<["-"], "mavx512vbmi">, Group<m_x86_Features_Group>;
def mno_avx512vbmi : Flag<["-"], "mno-avx512vbmi">, Group<m_x86_Features_Group>;
def mavx512vbmi2 : Flag<["-"], "mavx512vbmi2">, Group<m_x86_Features_Group>;
@@ -6209,8 +6205,6 @@ def mpopcnt : Flag<["-"], "mpopcnt">, Group<m_x86_Features_Group>;
def mno_popcnt : Flag<["-"], "mno-popcnt">, Group<m_x86_Features_Group>;
def mprefetchi : Flag<["-"], "mprefetchi">, Group<m_x86_Features_Group>;
def mno_prefetchi : Flag<["-"], "mno-prefetchi">, Group<m_x86_Features_Group>;
-def mprefetchwt1 : Flag<["-"], "mprefetchwt1">, Group<m_x86_Features_Group>;
-def mno_prefetchwt1 : Flag<["-"], "mno-prefetchwt1">, Group<m_x86_Features_Group>;
def mprfchw : Flag<["-"], "mprfchw">, Group<m_x86_Features_Group>;
def mno_prfchw : Flag<["-"], "mno-prfchw">, Group<m_x86_Features_Group>;
def mptwrite : Flag<["-"], "mptwrite">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index b823eaf6ce336..3a30cff917bb4 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -310,15 +310,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512VNNI = true;
} else if (Feature == "+avx512bf16") {
HasAVX512BF16 = true;
- } else if (Feature == "+avx512er") {
- HasAVX512ER = true;
- Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+avx512fp16") {
HasAVX512FP16 = true;
HasLegalHalfType = true;
- } else if (Feature == "+avx512pf") {
- HasAVX512PF = true;
- Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+avx512dq") {
HasAVX512DQ = true;
} else if (Feature == "+avx512bitalg") {
@@ -375,9 +369,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasWBNOINVD = true;
} else if (Feature == "+prefetchi") {
HasPREFETCHI = true;
- } else if (Feature == "+prefetchwt1") {
- HasPREFETCHWT1 = true;
- Diags.Report(diag::warn_knl_knm_isa_support_removed);
} else if (Feature == "+clzero") {
HasCLZERO = true;
} else if (Feature == "+cldemote") {
@@ -840,12 +831,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512VNNI__");
if (HasAVX512BF16)
Builder.defineMacro("__AVX512BF16__");
- if (HasAVX512ER)
- Builder.defineMacro("__AVX512ER__");
if (HasAVX512FP16)
Builder.defineMacro("__AVX512FP16__");
- if (HasAVX512PF)
- Builder.defineMacro("__AVX512PF__");
if (HasAVX512DQ)
Builder.defineMacro("__AVX512DQ__");
if (HasAVX512BITALG)
@@ -897,8 +884,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__SM4__");
if (HasPREFETCHI)
Builder.defineMacro("__PREFETCHI__");
- if (HasPREFETCHWT1)
- Builder.defineMacro("__PREFETCHWT1__");
if (HasCLZERO)
Builder.defineMacro("__CLZERO__");
if (HasKL)
@@ -1084,9 +1069,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("avx512vpopcntdq", true)
.Case("avx512vnni", true)
.Case("avx512bf16", true)
- .Case("avx512er", true)
.Case("avx512fp16", true)
- .Case("avx512pf", true)
.Case("avx512dq", true)
.Case("avx512bitalg", true)
.Case("avx512bw", true)
@@ -1134,7 +1117,6 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("pku", true)
.Case("popcnt", true)
.Case("prefetchi", true)
- .Case("prefetchwt1", true)
.Case("prfchw", true)
.Case("ptwrite", true)
.Case("raoint", true)
@@ -1201,9 +1183,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ)
.Case("avx512vnni", HasAVX512VNNI)
.Case("avx512bf16", HasAVX512BF16)
- .Case("avx512er", HasAVX512ER)
.Case("avx512fp16", HasAVX512FP16)
- .Case("avx512pf", HasAVX512PF)
.Case("avx512dq", HasAVX512DQ)
.Case("avx512bitalg", HasAVX512BITALG)
.Case("avx512bw", HasAVX512BW)
@@ -1253,7 +1233,6 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("pku", HasPKU)
.Case("popcnt", HasPOPCNT)
.Case("prefetchi", HasPREFETCHI)
- .Case("prefetchwt1", HasPREFETCHWT1)
.Case("prfchw", HasPRFCHW)
.Case("ptwrite", HasPTWRITE)
.Case("raoint", HasRAOINT)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 6a0a6cb84203d..0633b7e0da96a 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -103,8 +103,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512VNNI = false;
bool HasAVX512FP16 = false;
bool HasAVX512BF16 = false;
- bool HasAVX512ER = false;
- bool HasAVX512PF = false;
bool HasAVX512DQ = false;
bool HasAVX512BITALG = false;
bool HasAVX512BW = false;
@@ -136,7 +134,6 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasCLWB = false;
bool HasMOVBE = false;
bool HasPREFETCHI = false;
- bool HasPREFETCHWT1 = false;
bool HasRDPID = false;
bool HasRDPRU = false;
bool HasRetpolineExternalThunk = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 5f02c71f6ca51..dbff92b4e59b4 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -153,12 +153,10 @@ set(x86_files
avx512bwintrin.h
avx512cdintrin.h
avx512dqintrin.h
- avx512erintrin.h
avx512fintrin.h
avx512fp16intrin.h
avx512ifmaintrin.h
avx512ifmavlintrin.h
- avx512pfintrin.h
avx512vbmi2intrin.h
avx512vbmiintrin.h
avx512vbmivlintrin.h
diff --git a/clang/lib/Headers/avx512erintrin.h b/clang/lib/Headers/avx512erintrin.h
deleted file mode 100644
index 1c5a2d2d208ff..0000000000000
--- a/clang/lib/Headers/avx512erintrin.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------===
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- *
- *===-----------------------------------------------------------------------===
- */
-#ifndef __IMMINTRIN_H
-#error "Never use <avx512erintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef __AVX512ERINTRIN_H
-#define __AVX512ERINTRIN_H
-
-/* exp2a23 */
-#define _mm512_exp2a23_round_pd(A, R) \
- ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm512_mask_exp2a23_round_pd(S, M, A, R) \
- ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(S), (__mmask8)(M), \
- (int)(R)))
-
-#define _mm512_maskz_exp2a23_round_pd(M, A, R) \
- ((__m512d)__builtin_ia32_exp2pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm512_exp2a23_pd(A) \
- _mm512_exp2a23_round_pd((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_exp2a23_pd(S, M, A) \
- _mm512_mask_exp2a23_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_exp2a23_pd(M, A) \
- _mm512_maskz_exp2a23_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_exp2a23_round_ps(A, R) \
- ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R)))
-
-#define _mm512_mask_exp2a23_round_ps(S, M, A, R) \
- ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(S), (__mmask16)(M), \
- (int)(R)))
-
-#define _mm512_maskz_exp2a23_round_ps(M, A, R) \
- ((__m512)__builtin_ia32_exp2ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(M), (int)(R)))
-
-#define _mm512_exp2a23_ps(A) \
- _mm512_exp2a23_round_ps((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_exp2a23_ps(S, M, A) \
- _mm512_mask_exp2a23_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_exp2a23_ps(M, A) \
- _mm512_maskz_exp2a23_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-/* rsqrt28 */
-#define _mm512_rsqrt28_round_pd(A, R) \
- ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm512_mask_rsqrt28_round_pd(S, M, A, R) \
- ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(S), (__mmask8)(M), \
- (int)(R)))
-
-#define _mm512_maskz_rsqrt28_round_pd(M, A, R) \
- ((__m512d)__builtin_ia32_rsqrt28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm512_rsqrt28_pd(A) \
- _mm512_rsqrt28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rsqrt28_pd(S, M, A) \
- _mm512_mask_rsqrt28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rsqrt28_pd(M, A) \
- _mm512_maskz_rsqrt28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rsqrt28_round_ps(A, R) \
- ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R)))
-
-#define _mm512_mask_rsqrt28_round_ps(S, M, A, R) \
- ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(S), (__mmask16)(M), \
- (int)(R)))
-
-#define _mm512_maskz_rsqrt28_round_ps(M, A, R) \
- ((__m512)__builtin_ia32_rsqrt28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(M), (int)(R)))
-
-#define _mm512_rsqrt28_ps(A) \
- _mm512_rsqrt28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rsqrt28_ps(S, M, A) \
- _mm512_mask_rsqrt28_round_ps((S), (M), A, _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rsqrt28_ps(M, A) \
- _mm512_maskz_rsqrt28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rsqrt28_round_ss(A, B, R) \
- ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rsqrt28_round_ss(S, M, A, B, R) \
- ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)(__m128)(S), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rsqrt28_round_ss(M, A, B, R) \
- ((__m128)__builtin_ia32_rsqrt28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \
- (__v4sf)_mm_setzero_ps(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_rsqrt28_ss(A, B) \
- _mm_rsqrt28_round_ss((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rsqrt28_ss(S, M, A, B) \
- _mm_mask_rsqrt28_round_ss((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rsqrt28_ss(M, A, B) \
- _mm_maskz_rsqrt28_round_ss((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rsqrt28_round_sd(A, B, R) \
- ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm_mask_rsqrt28_round_sd(S, M, A, B, R) \
- ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)(__m128d)(S), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_maskz_rsqrt28_round_sd(M, A, B, R) \
- ((__m128d)__builtin_ia32_rsqrt28sd_round_mask((__v2df)(__m128d)(A), \
- (__v2df)(__m128d)(B), \
- (__v2df)_mm_setzero_pd(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm_rsqrt28_sd(A, B) \
- _mm_rsqrt28_round_sd((A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_mask_rsqrt28_sd(S, M, A, B) \
- _mm_mask_rsqrt28_round_sd((S), (M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_maskz_rsqrt28_sd(M, A, B) \
- _mm_maskz_rsqrt28_round_sd((M), (A), (B), _MM_FROUND_CUR_DIRECTION)
-
-/* rcp28 */
-#define _mm512_rcp28_round_pd(A, R) \
- ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)-1, (int)(R)))
-
-#define _mm512_mask_rcp28_round_pd(S, M, A, R) \
- ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)(__m512d)(S), (__mmask8)(M), \
- (int)(R)))
-
-#define _mm512_maskz_rcp28_round_pd(M, A, R) \
- ((__m512d)__builtin_ia32_rcp28pd_mask((__v8df)(__m512d)(A), \
- (__v8df)_mm512_setzero_pd(), \
- (__mmask8)(M), (int)(R)))
-
-#define _mm512_rcp28_pd(A) \
- _mm512_rcp28_round_pd((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rcp28_pd(S, M, A) \
- _mm512_mask_rcp28_round_pd((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rcp28_pd(M, A) \
- _mm512_maskz_rcp28_round_pd((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_rcp28_round_ps(A, R) \
- ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)-1, (int)(R)))
-
-#define _mm512_mask_rcp28_round_ps(S, M, A, R) \
- ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)(__m512)(S), (__mmask16)(M), \
- (int)(R)))
-
-#define _mm512_maskz_rcp28_round_ps(M, A, R) \
- ((__m512)__builtin_ia32_rcp28ps_mask((__v16sf)(__m512)(A), \
- (__v16sf)_mm512_setzero_ps(), \
- (__mmask16)(M), (int)(R)))
-
-#define _mm512_rcp28_ps(A) \
- _mm512_rcp28_round_ps((A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_mask_rcp28_ps(S, M, A) \
- _mm512_mask_rcp28_round_ps((S), (M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm512_maskz_rcp28_ps(M, A) \
- _mm512_maskz_rcp28_round_ps((M), (A), _MM_FROUND_CUR_DIRECTION)
-
-#define _mm_rcp28_round_ss(A, B, R) \
- ((__m128)__builtin_ia32_rcp28ss_round_mask((__v4sf)(__m128)(A), \
- (__v4sf)(__m128)(B), \...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/92883
More information about the llvm-commits
mailing list