[clang] [llvm] [X86][AVX10.2] Support AVX10.2-SATCVT new instructions. (PR #101599)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Aug 4 20:30:23 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
@llvm/pr-subscribers-llvm-ir
Author: Freddy Ye (FreddyLeaf)
<details>
<summary>Changes</summary>
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
---
Patch is 611.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101599.diff
25 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86.def (+38)
- (modified) clang/lib/Headers/CMakeLists.txt (+2)
- (added) clang/lib/Headers/avx10_2_512satcvtintrin.h (+327)
- (added) clang/lib/Headers/avx10_2satcvtintrin.h (+448)
- (modified) clang/lib/Headers/immintrin.h (+2)
- (modified) clang/lib/Sema/SemaX86.cpp (+16)
- (added) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c (+198)
- (added) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c (+379)
- (added) clang/test/CodeGen/X86/avx10_2satcvt-builtins.c (+603)
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+112)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+20)
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+21)
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+170)
- (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+82)
- (modified) llvm/lib/Target/X86/X86InstrUtils.td (+3-3)
- (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+72)
- (added) llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll (+1003)
- (added) llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll (+1618)
- (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvt-32.txt (+1363)
- (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvt-64.txt (+1363)
- (added) llvm/test/MC/X86/avx10.2satcvt-32-att.s (+1362)
- (added) llvm/test/MC/X86/avx10.2satcvt-32-intel.s (+1362)
- (added) llvm/test/MC/X86/avx10.2satcvt-64-att.s (+1362)
- (added) llvm/test/MC/X86/avx10.2satcvt-64-intel.s (+1362)
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+216)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index c49b5c36da4fc..fb55057b8cbc3 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2158,6 +2158,44 @@ TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi",
TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-256")
+
+// AVX10.2 SATCVT
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
#undef BUILTIN
#undef TARGET_BUILTIN
#undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f3d19e38f8f2b..91e106427ba1d 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -149,7 +149,9 @@ set(x86_files
amxintrin.h
avx10_2_512minmaxintrin.h
avx10_2_512niintrin.h
+ avx10_2_512satcvtintrin.h
avx10_2minmaxintrin.h
+ avx10_2satcvtintrin.h
avx10_2niintrin.h
avx2intrin.h
avx512bf16intrin.h
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
new file mode 100644
index 0000000000000..0ea645bee22f9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -0,0 +1,327 @@
+/*===------ avx10_2_512satcvtintrin.h - AVX10_2_512SATCVT intrinsics -------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2_512satcvtintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2_512SATCVTINTRIN_H
+#define __AVX10_2_512SATCVTINTRIN_H
+
+#define _mm512_ipcvtnebf16_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvtnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvtnebf16_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epi8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvtnebf16_epi8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epi8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvtnebf16_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvtnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvtnebf16_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epu8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvtnebf16_epu8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epu8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epi8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epu8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvtph_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtph_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)(W), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtph_epi8(U, A) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundph_epi8(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)_mm512_setzero_si512(), \
+ (__mmask32)-1, (const int)R))
+
+#define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundph_epi8(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)_mm512_setzero_si512(), \
+ (__mmask32)(U), (const int)R))
+
+#define _mm512_ipcvtph_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtph_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)(W), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtph_epu8(U, A) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundph_epu8(A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ (const int)R))
+
+#define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundph_epu8(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ (const int)R))
+
+#define _mm512_ipcvtps_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtps_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
+ (__v16su)(W), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtps_epi8(U, A) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundps_epi8(A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
+ (__v16su)_mm512_setzero_si512(), \
+ (__mmask16)-1, (const int)R))
+
+#define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundps_epi8(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A), \
+ (__v16su)_mm512_setzero_si512(), \
+ (__mmask16)(U), (const int)R))
+
+#define _mm512_ipcvtps_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtps_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A), \
+ (__v16su)(W), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtps_epu8(U, A) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundps_epu8(A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ (const int)R))
+
+#define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundps_epu8(U, A, R) \
+ ((__m512i)__builtin_ia32_vcvtps2iubs512_mask( \
+ (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U), \
+ (const int)R))
+
+#define _mm512_ipcvttnebf16_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epi8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
+ (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epu8(U, A) \
+ ((__m512i)__builtin_ia32_selectw_512( \
+ (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A), \
+ (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttph_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttph_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)(W), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvttph_epi8(U, A) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvtt_roundph_epi8(A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ S))
+
+#define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
+
+#define _mm512_maskz_ipcvtt_roundph_epi8(U, A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2ibs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ S))
+
+#define _mm512_ipcvttph_epu8(A) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttph_epu8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A), \
+ (__v32hu)(W), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvttph_epu8(U, A) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvtt_roundph_epu8(A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1, \
+ S))
+
+#define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
+
+#define _mm512_maskz_ipcvtt_roundph_epu8(U, A, S) \
+ ((__m512i)__builtin_ia32_vcvttph2iubs512_mask( \
+ (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U), \
+ S))
+
+#define _mm512_ipcvttps_epi8(A) \
+ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask( \
+ (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1, \
+ _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttps_epi8(W, U, A) \
+ ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A), \
+ ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/101599
More information about the cfe-commits
mailing list