[clang] [llvm] [X86][AVX10.2] Support AVX10.2-CONVERT new instructions. (PR #101600)

via cfe-commits cfe-commits at lists.llvm.org
Mon Aug 5 20:26:14 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-ir

Author: Freddy Ye (FreddyLeaf)

<details>
<summary>Changes</summary>

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965

---

Patch is 613.01 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101600.diff


23 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsX86.def (+47) 
- (modified) clang/lib/Headers/CMakeLists.txt (+2) 
- (added) clang/lib/Headers/avx10_2_512convertintrin.h (+304) 
- (added) clang/lib/Headers/avx10_2convertintrin.h (+560) 
- (modified) clang/lib/Headers/immintrin.h (+2) 
- (modified) clang/lib/Sema/SemaX86.cpp (+2) 
- (added) clang/test/CodeGen/X86/avx10_2_512convert-builtins.c (+274) 
- (added) clang/test/CodeGen/X86/avx10_2convert-builtins.c (+530) 
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+130) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+38) 
- (modified) llvm/lib/Target/X86/X86ISelLowering.h (+24) 
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+431) 
- (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+89) 
- (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+85) 
- (added) llvm/test/CodeGen/X86/avx10_2_512convert-intrinsics.ll (+578) 
- (added) llvm/test/CodeGen/X86/avx10_2convert-intrinsics.ll (+1147) 
- (added) llvm/test/MC/Disassembler/X86/avx10.2convert-32.txt (+1491) 
- (added) llvm/test/MC/Disassembler/X86/avx10.2convert-64.txt (+1491) 
- (added) llvm/test/MC/X86/avx10.2convert-32-att.s (+1490) 
- (added) llvm/test/MC/X86/avx10.2convert-32-intel.s (+1490) 
- (added) llvm/test/MC/X86/avx10.2convert-64-att.s (+1490) 
- (added) llvm/test/MC/X86/avx10.2convert-64-intel.s (+1490) 
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+243) 


``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index c49b5c36da4fc..1deac6247a361 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2158,6 +2158,53 @@ TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi",
 TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-256")
+
+// AVX10.2 CONVERT
+TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx128_mask, "V8xV4fV4fV8xUc", "ncV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx256_mask, "V16xV8fV8fV16xUsIi", "ncV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvt2ps2phx512_mask, "V32xV16fV16fV32xUiIi", "ncV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2bf8s_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_128_mask, "V16cV16cV8xV16cUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_256_mask, "V16cV32cV16xV16cUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtbiasph2hf8s_512_mask, "V32cV64cV32xV32cUi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2bf8s_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_128, "V16cV8xV8x", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_256, "V32cV16xV16x", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtne2ph2hf8s_512, "V64cV32xV32x", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf8_2ph128_mask, "V8xV16cV8xUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf8_2ph256_mask, "V16xV16cV16xUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf8_2ph512_mask, "V32xV32cV32xUi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtnehf8_2ph128_mask, "V8xV16cV8xUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnehf8_2ph256_mask, "V16xV16cV16xUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnehf8_2ph512_mask, "V32xV32cV32xUi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2bf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_128_mask, "V16cV8xV16cUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_256_mask, "V16cV16xV16cUs", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtneph2hf8s_512_mask, "V32cV32xV32cUi", "nV:512:", "avx10.2-512")
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f3d19e38f8f2b..2eb550d07afaa 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -147,8 +147,10 @@ set(x86_files
   amxcomplexintrin.h
   amxfp16intrin.h
   amxintrin.h
+  avx10_2_512convertintrin.h
   avx10_2_512minmaxintrin.h
   avx10_2_512niintrin.h
+  avx10_2convertintrin.h
   avx10_2minmaxintrin.h
   avx10_2niintrin.h
   avx2intrin.h
diff --git a/clang/lib/Headers/avx10_2_512convertintrin.h b/clang/lib/Headers/avx10_2_512convertintrin.h
new file mode 100644
index 0000000000000..c56c917772b32
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512convertintrin.h
@@ -0,0 +1,304 @@
+/*===--------- avx10_2_512convertintrin.h - AVX10_2_512CONVERT -------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2_512convertintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifdef __SSE2__
+
+#ifndef __AVX10_2_512CONVERTINTRIN_H
+#define __AVX10_2_512CONVERTINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"),    \
+                 __min_vector_width__(512)))
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtx2ps_ph(__m512 __A,
+                                                                  __m512 __B) {
+  return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask(
+      (__v16sf)__A, (__v16sf)__B, (__v32hf)_mm512_setzero_ph(), (__mmask32)(-1),
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtx2ps_ph(__m512h __W, __mmask32 __U, __m512 __A, __m512 __B) {
+  return (__m512h)__builtin_ia32_vcvt2ps2phx512_mask(
+      (__v16sf)__A, (__v16sf)__B, (__v32hf)__W, (__mmask32)__U,
+      _MM_FROUND_CUR_DIRECTION);
+}
+
+#define _mm512_cvtx_round2ps_ph(A, B, R)                                       \
+  ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask(                                \
+      (__v16sf)(A), (__v16sf)(B), (__v32hf)_mm512_undefined_ph(),              \
+      (__mmask32)(-1), (const int)(R)))
+
+#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R)                            \
+  ((__m512h)__builtin_ia32_vcvt2ps2phx512_mask((__v16sf)(A), (__v16sf)(B),     \
+                                               (__v32hf)(W), (__mmask32)(U),   \
+                                               (const int)(R)))
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvtbiasph_pbf8(__m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
+      (__mmask32)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_pbf8(
+    __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtbiasph_pbf8(__mmask32 __U, __m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2bf8_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
+      (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvtbiassph_pbf8(__m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
+      (__mmask32)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_pbf8(
+    __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtbiassph_pbf8(__mmask32 __U, __m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2bf8s_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
+      (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvtbiasph_phf8(__m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
+      (__mmask32)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiasph_phf8(
+    __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtbiasph_phf8(__mmask32 __U, __m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2hf8_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
+      (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvtbiassph_phf8(__m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)_mm256_undefined_si256(),
+      (__mmask32)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtbiassph_phf8(
+    __m256i __W, __mmask32 __U, __m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)__W, (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtbiassph_phf8(__mmask32 __U, __m512i __A, __m512h __B) {
+  return (__m256i)__builtin_ia32_vcvtbiasph2hf8s_512_mask(
+      (__v64qi)__A, (__v32hf)__B, (__v32qi)(__m256i)_mm256_setzero_si256(),
+      (__mmask32)__U);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtne2ph_pbf8(__m512h __A, __m512h __B) {
+  return (__m512i)__builtin_ia32_vcvtne2ph2bf8_512((__v32hf)(__A),
+                                                   (__v32hf)(__B));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph_pbf8(
+    __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+  return (__m512i)__builtin_ia32_selectb_512(
+      (__mmask64)__U, (__v64qi)_mm512_cvtne2ph_pbf8(__A, __B), (__v64qi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtnes2ph_pbf8(__m512h __A, __m512h __B) {
+  return (__m512i)__builtin_ia32_vcvtne2ph2bf8s_512((__v32hf)(__A),
+                                                    (__v32hf)(__B));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtnes2ph_pbf8(
+    __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+  return (__m512i)__builtin_ia32_selectb_512(
+      (__mmask64)__U, (__v64qi)_mm512_cvtnes2ph_pbf8(__A, __B), (__v64qi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtne2ph_phf8(__m512h __A, __m512h __B) {
+  return (__m512i)__builtin_ia32_vcvtne2ph2hf8_512((__v32hf)(__A),
+                                                   (__v32hf)(__B));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph_phf8(
+    __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+  return (__m512i)__builtin_ia32_selectb_512(
+      (__mmask64)__U, (__v64qi)_mm512_cvtne2ph_phf8(__A, __B), (__v64qi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_cvtne2ph2hf8s_phf8(__m512h __A, __m512h __B) {
+  return (__m512i)__builtin_ia32_vcvtne2ph2hf8s_512((__v32hf)(__A),
+                                                    (__v32hf)(__B));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtne2ph2hf8s_phf8(
+    __m512i __W, __mmask64 __U, __m512h __A, __m512h __B) {
+  return (__m512i)__builtin_ia32_selectb_512(
+      (__mmask64)__U, (__v64qi)_mm512_cvtne2ph2hf8s_phf8(__A, __B),
+      (__v64qi)__W);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_cvtnebf8_ph(__m256i __A) {
+  return (__m512h)__builtin_ia32_vcvtnebf8_2ph512_mask(
+      (__v32qi)__A, (__v32hf)(__m512h)_mm512_undefined_ph(), (__mmask32)-1);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtnebf8_ph(__m512h __W, __mmask32 __U, __m256i __A) {
+  return (__m512h)__builtin_ia32_vcvtnebf8_2ph512_mask(
+      (__v32qi)__A, (__v32hf)(__m512h)__W, (__mmask32)__U);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtnebf8_ph(__mmask32 __U, __m256i __A) {
+  return (__m512h)__builtin_ia32_vcvtnebf8_2ph512_mask(
+      (__v32qi)__A, (__v32hf)(__m512h)_mm512_setzero_ph(), (__mmask32)__U);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_cvtnehf8_ph(__m256i __A) {
+  return (__m512h)__builtin_ia32_vcvtnehf8_2ph512_mask(
+      (__v32qi)__A, (__v32hf)(__m512h)_mm512_undefined_ph(), (__mmask32)-1);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtnehf8_ph(__m512h __W, __mmask32 __U, __m256i __A) {
+  return (__m512h)__builtin_ia32_vcvtnehf8_2ph512_mask(
+      (__v32qi)__A, (__v32hf)(__m512h)__W, (__mmask32)__U);
+}
+
+static __inline__ __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtnehf8_ph(__mmask32 __U, __m256i __A) {
+  return (__m512h)__builtin_ia32_vcvtnehf8_2ph512_mask(
+      (__v32qi)__A, (__v32hf)(__m512h)_mm512_setzero_ph(), (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvtneph_pbf8(__m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtneph_pbf8(__m256i __W, __mmask32 __U, __m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtneph_pbf8(__mmask32 __U, __m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2bf8_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvtnesph_pbf8(__m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtnesph_pbf8(__m256i __W, __mmask32 __U, __m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtnesph_pbf8(__mmask32 __U, __m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2bf8s_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvtneph_phf8(__m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtneph_phf8(__m256i __W, __mmask32 __U, __m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtneph_phf8(__mmask32 __U, __m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2hf8_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_cvtnesph_phf8(__m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)_mm256_undefined_si256(), (__mmask32)-1);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtnesph_phf8(__m256i __W, __mmask32 __U, __m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)__W, (__mmask32)__U);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtnesph_phf8(__mmask32 __U, __m512h __A) {
+  return (__m256i)__builtin_ia32_vcvtneph2hf8s_512_mask(
+      (__v32hf)__A, (__v32qi)(__m256i)_mm256_setzero_si256(), (__mmask32)__U);
+}
+
+static __inline __m512h __DEFAULT_FN_ATTRS512 _mm512_cvtpbf8_ph(__m256i __A) {
+  return _mm512_castsi512_ph(_mm512_slli_epi16(_mm512_cvtepi8_epi16(__A), 8));
+}
+
+static __inline __m512h __DEFAULT_FN_ATTRS512
+_mm512_mask_cvtpbf8_ph(__m512h __S, __mmask16 __U, __m256i __A) {
+  return _mm512_castsi512_ph(
+      _mm512_mask_slli_epi16((__m512i)__S, __U, _mm512_cvtepi8_epi16(__A), 8));
+}
+
+static __inline __m512h __DEFAULT_FN_ATTRS512
+_mm512_maskz_cvtpbf8_ph(__mmask16 __U, __m256i __A) {
+  return _mm512_castsi512_ph(
+      _mm512_slli_epi16(_mm512_maskz_cvtepi8_epi16(__U, __A), 8));
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif // __AVX10_2_512CONVERTINTRIN_H
+#endif // __SSE2__
diff --git a/clang/lib/Headers/avx10_2convertintrin.h b/clang/lib/Headers/avx10_2convertintrin.h
new file mode 100644
index 0000000000000..9d7b6008cb6af
--- /dev/null
+++ b/clang/lib/Headers/avx10_2convertintrin.h
@@ -0,0 +1,560 @@
+/*===---------- avx10_2convertintrin.h - AVX512NECONVERTFP8 ----------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifdef __SSE2__
+
+#ifndef __AVX10_2CONVERTINTRIN_H
+#define __AVX10_2CONVERTINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(256)))
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A,
+                                                               __m128 __B) {
+  return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
+      (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1));
+}
+
+static __inline__ __m128h __DEFAULT_FN_ATTRS128
+_mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) {
+  return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
+      (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U);
+}
+
+static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A,
+                                                            ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/101600


More information about the cfe-commits mailing list