[clang] [compiler-rt] [llvm] [X86] Support MOVRS and AVX10.2 instructions. (PR #113274)

via llvm-commits llvm-commits at lists.llvm.org
Tue Oct 22 19:25:47 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Freddy Ye (FreddyLeaf)

<details>
<summary>Changes</summary>

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368


---

Patch is 83.30 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113274.diff


30 Files Affected:

- (modified) clang/docs/ReleaseNotes.rst (+3) 
- (modified) clang/include/clang/Basic/BuiltinsX86.def (+13) 
- (modified) clang/include/clang/Driver/Options.td (+2) 
- (modified) clang/lib/Basic/Targets/X86.cpp (+6) 
- (modified) clang/lib/Basic/Targets/X86.h (+1) 
- (modified) clang/lib/Headers/CMakeLists.txt (+2) 
- (modified) clang/lib/Headers/immintrin.h (+10) 
- (added) clang/lib/Headers/movrs_avx10_2_512intrin.h (+98) 
- (added) clang/lib/Headers/movrs_avx10_2intrin.h (+174) 
- (added) clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c (+50) 
- (added) clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c (+87) 
- (added) clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c (+98) 
- (added) clang/test/CodeGen/X86/movrs-avx10.2-builtins.c (+171) 
- (modified) clang/test/CodeGen/target-builtin-noerror.c (+1) 
- (modified) clang/test/Driver/x86-target-features.c (+5) 
- (modified) clang/test/Preprocessor/x86_target_features.c (+6) 
- (modified) compiler-rt/lib/builtins/cpu_model/x86.c (+3) 
- (modified) llvm/docs/ReleaseNotes.md (+2) 
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+39) 
- (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+1) 
- (modified) llvm/lib/Target/X86/X86.td (+2) 
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+28) 
- (modified) llvm/lib/Target/X86/X86InstrPredicates.td (+1) 
- (modified) llvm/lib/TargetParser/Host.cpp (+1) 
- (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+2) 
- (added) llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll (+163) 
- (added) llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll (+329) 
- (added) llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt (+98) 
- (added) llvm/test/MC/X86/movrs-avx10-att-64.s (+98) 
- (added) llvm/test/MC/X86/movrs-avx10-intel-64.s (+97) 


``````````diff
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b7a6ace8bb895d..f212cdef7864e3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -618,6 +618,9 @@ X86 Support
 
 - All intrinsics in tbmintrin.h can now be used in constant expressions.
 
+- Supported intrinsics for `MOVRS AND AVX10.2`
+  * Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
+
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421ca..17ee3df85ff7a6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2339,6 +2339,19 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512, "V32yV32yV32yV32y", "ncV:512:", "a
 TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:", "avx10.2-256")
 
+// MOVRS and AVX10.2
+TARGET_BUILTIN(__builtin_ia32_vmovrsb128, "V16cV16cC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsb256, "V32cV32cC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsb512, "V64cV64cC*", "nV:512:", "movrs,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmovrsd128, "V4iV4iC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsd256, "V8iV8iC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsd512, "V16iV16iC*", "nV:512:", "movrs,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmovrsq128, "V2OiV2OiC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsq256, "V4OiV4OiC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsq512, "V8OiV8OiC*", "nV:512:", "movrs,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmovrsw128, "V8sV8sC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsw256, "V16sV16sC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsw512, "V32sV32sC*", "nV:512:", "movrs,avx10.2-512")
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 152c43d7908ff8..4cb2e77f8d9f61 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6415,6 +6415,8 @@ def mmovdiri : Flag<["-"], "mmovdiri">, Group<m_x86_Features_Group>;
 def mno_movdiri : Flag<["-"], "mno-movdiri">, Group<m_x86_Features_Group>;
 def mmovdir64b : Flag<["-"], "mmovdir64b">, Group<m_x86_Features_Group>;
 def mno_movdir64b : Flag<["-"], "mno-movdir64b">, Group<m_x86_Features_Group>;
+def mmovrs : Flag<["-"], "mmovrs">, Group<m_x86_Features_Group>;
+def mno_movrs : Flag<["-"], "mno-movrs">, Group<m_x86_Features_Group>;
 def mmwaitx : Flag<["-"], "mmwaitx">, Group<m_x86_Features_Group>;
 def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group<m_x86_Features_Group>;
 def mpku : Flag<["-"], "mpku">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 5448bd841959f4..d067ec218b5270 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -348,6 +348,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasSM4 = true;
     } else if (Feature == "+movbe") {
       HasMOVBE = true;
+    } else if (Feature == "+movrs") {
+      HasMOVRS = true;
     } else if (Feature == "+sgx") {
       HasSGX = true;
     } else if (Feature == "+cx8") {
@@ -915,6 +917,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__MOVDIRI__");
   if (HasMOVDIR64B)
     Builder.defineMacro("__MOVDIR64B__");
+  if (HasMOVRS)
+    Builder.defineMacro("__MOVRS__");
   if (HasPCONFIG)
     Builder.defineMacro("__PCONFIG__");
   if (HasPTWRITE)
@@ -1116,6 +1120,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("lzcnt", true)
       .Case("mmx", true)
       .Case("movbe", true)
+      .Case("movrs", true)
       .Case("movdiri", true)
       .Case("movdir64b", true)
       .Case("mwaitx", true)
@@ -1233,6 +1238,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("lzcnt", HasLZCNT)
       .Case("mmx", HasMMX)
       .Case("movbe", HasMOVBE)
+      .Case("movrs", HasMOVRS)
       .Case("movdiri", HasMOVDIRI)
       .Case("movdir64b", HasMOVDIR64B)
       .Case("mwaitx", HasMWAITX)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index a99ae62984c7d5..05fef8c1344853 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -130,6 +130,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasCLFLUSHOPT = false;
   bool HasCLWB = false;
   bool HasMOVBE = false;
+  bool HasMOVRS = false;
   bool HasPREFETCHI = false;
   bool HasRDPID = false;
   bool HasRDPRU = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..e97953d87a2ff9 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -221,6 +221,8 @@ set(x86_files
   mm3dnow.h
   mmintrin.h
   movdirintrin.h
+  movrs_avx10_2_512intrin.h
+  movrs_avx10_2intrin.h
   mwaitxintrin.h
   nmmintrin.h
   pconfigintrin.h
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..5f296d0a3324d0 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -605,6 +605,16 @@ _storebe_i64(void * __P, long long __D) {
 #include <movdirintrin.h>
 #endif
 
+#if !defined(__SCE__) || __has_feature(modules) ||                             \
+    (defined(__AVX10_2__) && defined(__MOVRS__))
+#include <movrs_avx10_2intrin.h>
+#endif
+
+#if !defined(__SCE__) || __has_feature(modules) ||                             \
+    (defined(__AVX10_2_512__) && defined(__MOVRS__))
+#include <movrs_avx10_2_512intrin.h>
+#endif
+
 #if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
 #include <pconfigintrin.h>
 #endif
diff --git a/clang/lib/Headers/movrs_avx10_2_512intrin.h b/clang/lib/Headers/movrs_avx10_2_512intrin.h
new file mode 100644
index 00000000000000..1d04d3122f2c5f
--- /dev/null
+++ b/clang/lib/Headers/movrs_avx10_2_512intrin.h
@@ -0,0 +1,98 @@
+/*===-------- movrs_avx10_2_512intrin.h - AVX512MOVRS intrinsics -----------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <movrs_avx10_2_512intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __MOVRS_AVX10_2_512INTRIN_H
+#define __MOVRS_AVX10_2_512INTRIN_H
+#ifdef __x86_64__
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("movrs, avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi8(void const *__A) {
+  return (__m512i)__builtin_ia32_vmovrsb512((const __v64qi *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi8(__m512i __W, __mmask64 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectb_512(
+      (__mmask64)__U, (__v64qi)_mm512_loadrs_epi8(__A), (__v64qi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi8(__mmask64 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                             (__v64qi)_mm512_loadrs_epi8(__A),
+                                             (__v64qi)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi32(void const *__A) {
+  return (__m512i)__builtin_ia32_vmovrsd512((const __v16si *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi32(__m512i __W, __mmask16 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_loadrs_epi32(__A), (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi32(__mmask16 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
+                                             (__v16si)_mm512_loadrs_epi32(__A),
+                                             (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi64(void const *__A) {
+  return (__m512i)__builtin_ia32_vmovrsq512((const __v8di *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi64(__m512i __W, __mmask8 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      (__mmask8)__U, (__v8di)_mm512_loadrs_epi64(__A), (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
+                                             (__v8di)_mm512_loadrs_epi64(__A),
+                                             (__v8di)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi16(void const *__A) {
+  return (__m512i)__builtin_ia32_vmovrsw512((const __v32hi *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi16(__m512i __W, __mmask32 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectw_512(
+      (__mmask32)__U, (__v32hi)_mm512_loadrs_epi16(__A), (__v32hi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi16(__mmask32 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                             (__v32hi)_mm512_loadrs_epi16(__A),
+                                             (__v32hi)_mm512_setzero_si512());
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif /* __x86_64__ */
+#endif /* __MOVRS_AVX10_2_512INTRIN_H */
diff --git a/clang/lib/Headers/movrs_avx10_2intrin.h b/clang/lib/Headers/movrs_avx10_2intrin.h
new file mode 100644
index 00000000000000..f38c78afe2ef94
--- /dev/null
+++ b/clang/lib/Headers/movrs_avx10_2intrin.h
@@ -0,0 +1,174 @@
+/*===---------- movrs_avx10_2intrin.h - AVX512MOVRS intrinsics -------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <movrs_avx10_2intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __MOVRS_AVX10_2INTRIN_H
+#define __MOVRS_AVX10_2INTRIN_H
+#ifdef __x86_64__
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("movrs,avx10.2-256"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("movrs,avx10.2-256"), __min_vector_width__(256)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi8(void const *__A) {
+  return (__m128i)__builtin_ia32_vmovrsb128((const __v16qi *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi8(__m128i __W, __mmask16 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectb_128(
+      (__mmask16)__U, (__v16qi)_mm_loadrs_epi8(__A), (__v16qi)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi8(__mmask16 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                             (__v16qi)_mm_loadrs_epi8(__A),
+                                             (__v16qi)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi8(void const *__A) {
+  return (__m256i)__builtin_ia32_vmovrsb256((const __v32qi *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi8(__m256i __W, __mmask32 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectb_256(
+      (__mmask32)__U, (__v32qi)_mm256_loadrs_epi8(__A), (__v32qi)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi8(__mmask32 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                             (__v32qi)_mm256_loadrs_epi8(__A),
+                                             (__v32qi)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi32(void const *__A) {
+  return (__m128i)__builtin_ia32_vmovrsd128((const __v4si *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi32(__m128i __W, __mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectd_128(
+      (__mmask8)__U, (__v4si)_mm_loadrs_epi32(__A), (__v4si)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi32(__mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_loadrs_epi32(__A),
+                                             (__v4si)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi32(void const *__A) {
+  return (__m256i)__builtin_ia32_vmovrsd256((const __v8si *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi32(__m256i __W, __mmask8 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      (__mmask8)__U, (__v8si)_mm256_loadrs_epi32(__A), (__v8si)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi32(__mmask8 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                             (__v8si)_mm256_loadrs_epi32(__A),
+                                             (__v8si)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi64(void const *__A) {
+  return (__m128i)__builtin_ia32_vmovrsq128((const __v2di *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi64(__m128i __W, __mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      (__mmask8)__U, (__v2di)_mm_loadrs_epi64(__A), (__v2di)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                             (__v2di)_mm_loadrs_epi64(__A),
+                                             (__v2di)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi64(void const *__A) {
+  return (__m256i)__builtin_ia32_vmovrsq256((const __v4di *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi64(__m256i __W, __mmask8 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      (__mmask8)__U, (__v4di)_mm256_loadrs_epi64(__A), (__v4di)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                             (__v4di)_mm256_loadrs_epi64(__A),
+                                             (__v4di)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi16(void const *__A) {
+  return (__m128i)__builtin_ia32_vmovrsw128((const __v8hi *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi16(__m128i __W, __mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectw_128(
+      (__mmask8)__U, (__v8hi)_mm_loadrs_epi16(__A), (__v8hi)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi16(__mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                             (__v8hi)_mm_loadrs_epi16(__A),
+                                             (__v8hi)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi16(void const *__A) {
+  return (__m256i)__builtin_ia32_vmovrsw256((const __v16hi *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi16(__m256i __W, __mmask16 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectw_256(
+      (__mmask16)__U, (__v16hi)_mm256_loadrs_epi16(__A), (__v16hi)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi16(__mmask16 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                             (__v16hi)_mm256_loadrs_epi16(__A),
+                                             (__v16hi)_mm256_setzero_si256());
+}
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif /* __x86_64__ */
+#endif /* __MOVRS_AVX10_2INTRIN_H */
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
new file mode 100644
index 00000000000000..944033724a6a2b
--- /dev/null
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -verify
+
+#include <immintrin.h>
+__m512i test_mm512_loadrs_epi8(const __m512i * __A) {
+  return _mm512_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi8(__m512i __A, __mmask64 __B, const __m512i * __C) {
+  return _mm512_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi8(__mmask64 __A, const __m512i * __B) {
+  return _mm512_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_loadrs_epi32(const __m512i * __A) {
+  return _mm512_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi32(__m512i __A, __mmask16 __B, const __m512i * __C) {
+  return _mm512_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi32(__mmask16 __A, const __m512i * __B) {
+  return _mm512_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_loadrs_epi64(const __m512i * __A) {
+  return _mm512_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi64(__m512i __A, __mmask8 __B, const __m512i * __C) {
+  return _mm512_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi64(__mmask8 __A, const __m512i * __B) {
+  return _mm512_maskz_loadrs_epi64(__A, __B); // expected-...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/113274


More information about the llvm-commits mailing list