[clang] [compiler-rt] [llvm] [X86] Support MOVRS and AVX10.2 instructions. (PR #113274)

Freddy Ye via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 22 19:26:22 PDT 2024


https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/113274

>From 19c6400ac7127860ac1712941acbd1585614d17d Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 21 Oct 2024 10:24:14 +0800
Subject: [PATCH 1/3] [X86] Support MOVRS and AVX10.2 instructions.

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368
---
 clang/include/clang/Basic/BuiltinsX86.def     |  13 +
 clang/lib/Basic/Targets/X86.cpp               |   4 +
 clang/lib/Basic/Targets/X86.h                 |   1 +
 clang/lib/Headers/CMakeLists.txt              |   2 +
 clang/lib/Headers/immintrin.h                 |  10 +
 clang/lib/Headers/movrs_avx10_2_512intrin.h   |  98 ++++++
 clang/lib/Headers/movrs_avx10_2intrin.h       | 174 +++++++++
 .../X86/movrs-avx10.2-512-builtins-error-32.c |  50 +++
 .../CodeGen/X86/movrs-avx10.2-512-builtins.c  |  87 +++++
 .../X86/movrs-avx10.2-builtins-error-32.c     |  98 ++++++
 .../test/CodeGen/X86/movrs-avx10.2-builtins.c | 171 +++++++++
 llvm/include/llvm/IR/IntrinsicsX86.td         |  39 +++
 .../llvm/TargetParser/X86TargetParser.def     |   1 +
 llvm/lib/Target/X86/X86.td                    |   2 +
 llvm/lib/Target/X86/X86InstrAVX10.td          |  28 ++
 llvm/lib/Target/X86/X86InstrPredicates.td     |   1 +
 llvm/lib/TargetParser/X86TargetParser.cpp     |   2 +
 .../X86/movrs-avx10.2-512-intrinsics.ll       | 163 +++++++++
 .../CodeGen/X86/movrs-avx10.2-intrinsics.ll   | 329 ++++++++++++++++++
 .../MC/Disassembler/X86/movrs-avx10-64.txt    |  98 ++++++
 llvm/test/MC/X86/movrs-avx10-att-64.s         |  98 ++++++
 llvm/test/MC/X86/movrs-avx10-intel-64.s       |  97 ++++++
 22 files changed, 1566 insertions(+)
 create mode 100644 clang/lib/Headers/movrs_avx10_2_512intrin.h
 create mode 100644 clang/lib/Headers/movrs_avx10_2intrin.h
 create mode 100644 clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
 create mode 100644 clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c
 create mode 100644 clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
 create mode 100644 clang/test/CodeGen/X86/movrs-avx10.2-builtins.c
 create mode 100644 llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
 create mode 100644 llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
 create mode 100644 llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt
 create mode 100644 llvm/test/MC/X86/movrs-avx10-att-64.s
 create mode 100644 llvm/test/MC/X86/movrs-avx10-intel-64.s

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421ca..17ee3df85ff7a6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2339,6 +2339,19 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512, "V32yV32yV32yV32y", "ncV:512:", "a
 TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:", "avx10.2-256")
 
+// MOVRS and AVX10.2
+TARGET_BUILTIN(__builtin_ia32_vmovrsb128, "V16cV16cC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsb256, "V32cV32cC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsb512, "V64cV64cC*", "nV:512:", "movrs,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmovrsd128, "V4iV4iC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsd256, "V8iV8iC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsd512, "V16iV16iC*", "nV:512:", "movrs,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmovrsq128, "V2OiV2OiC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsq256, "V4OiV4OiC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsq512, "V8OiV8OiC*", "nV:512:", "movrs,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmovrsw128, "V8sV8sC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsw256, "V16sV16sC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsw512, "V32sV32sC*", "nV:512:", "movrs,avx10.2-512")
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 5448bd841959f4..d4d099504a6a71 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -348,6 +348,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
       HasSM4 = true;
     } else if (Feature == "+movbe") {
       HasMOVBE = true;
+    } else if (Feature == "+movrs") {
+      HasMOVRS = true;
     } else if (Feature == "+sgx") {
       HasSGX = true;
     } else if (Feature == "+cx8") {
@@ -1116,6 +1118,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
       .Case("lzcnt", true)
       .Case("mmx", true)
       .Case("movbe", true)
+      .Case("movrs", true)
       .Case("movdiri", true)
       .Case("movdir64b", true)
       .Case("mwaitx", true)
@@ -1233,6 +1236,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
       .Case("lzcnt", HasLZCNT)
       .Case("mmx", HasMMX)
       .Case("movbe", HasMOVBE)
+      .Case("movrs", HasMOVRS)
       .Case("movdiri", HasMOVDIRI)
       .Case("movdir64b", HasMOVDIR64B)
       .Case("mwaitx", HasMWAITX)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index a99ae62984c7d5..05fef8c1344853 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -130,6 +130,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
   bool HasCLFLUSHOPT = false;
   bool HasCLWB = false;
   bool HasMOVBE = false;
+  bool HasMOVRS = false;
   bool HasPREFETCHI = false;
   bool HasRDPID = false;
   bool HasRDPRU = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..e97953d87a2ff9 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -221,6 +221,8 @@ set(x86_files
   mm3dnow.h
   mmintrin.h
   movdirintrin.h
+  movrs_avx10_2_512intrin.h
+  movrs_avx10_2intrin.h
   mwaitxintrin.h
   nmmintrin.h
   pconfigintrin.h
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..5f296d0a3324d0 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -605,6 +605,16 @@ _storebe_i64(void * __P, long long __D) {
 #include <movdirintrin.h>
 #endif
 
+#if !defined(__SCE__) || __has_feature(modules) ||                             \
+    (defined(__AVX10_2__) && defined(__MOVRS__))
+#include <movrs_avx10_2intrin.h>
+#endif
+
+#if !defined(__SCE__) || __has_feature(modules) ||                             \
+    (defined(__AVX10_2_512__) && defined(__MOVRS__))
+#include <movrs_avx10_2_512intrin.h>
+#endif
+
 #if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
 #include <pconfigintrin.h>
 #endif
diff --git a/clang/lib/Headers/movrs_avx10_2_512intrin.h b/clang/lib/Headers/movrs_avx10_2_512intrin.h
new file mode 100644
index 00000000000000..1d04d3122f2c5f
--- /dev/null
+++ b/clang/lib/Headers/movrs_avx10_2_512intrin.h
@@ -0,0 +1,98 @@
+/*===-------- movrs_avx10_2_512intrin.h - AVX512MOVRS intrinsics -----------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <movrs_avx10_2_512intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __MOVRS_AVX10_2_512INTRIN_H
+#define __MOVRS_AVX10_2_512INTRIN_H
+#ifdef __x86_64__
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("movrs, avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi8(void const *__A) {
+  return (__m512i)__builtin_ia32_vmovrsb512((const __v64qi *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi8(__m512i __W, __mmask64 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectb_512(
+      (__mmask64)__U, (__v64qi)_mm512_loadrs_epi8(__A), (__v64qi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi8(__mmask64 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+                                             (__v64qi)_mm512_loadrs_epi8(__A),
+                                             (__v64qi)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi32(void const *__A) {
+  return (__m512i)__builtin_ia32_vmovrsd512((const __v16si *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi32(__m512i __W, __mmask16 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectd_512(
+      (__mmask16)__U, (__v16si)_mm512_loadrs_epi32(__A), (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi32(__mmask16 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
+                                             (__v16si)_mm512_loadrs_epi32(__A),
+                                             (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi64(void const *__A) {
+  return (__m512i)__builtin_ia32_vmovrsq512((const __v8di *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi64(__m512i __W, __mmask8 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectq_512(
+      (__mmask8)__U, (__v8di)_mm512_loadrs_epi64(__A), (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
+                                             (__v8di)_mm512_loadrs_epi64(__A),
+                                             (__v8di)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi16(void const *__A) {
+  return (__m512i)__builtin_ia32_vmovrsw512((const __v32hi *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi16(__m512i __W, __mmask32 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectw_512(
+      (__mmask32)__U, (__v32hi)_mm512_loadrs_epi16(__A), (__v32hi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi16(__mmask32 __U, void const *__A) {
+  return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+                                             (__v32hi)_mm512_loadrs_epi16(__A),
+                                             (__v32hi)_mm512_setzero_si512());
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif /* __x86_64__ */
+#endif /* __MOVRS_AVX10_2_512INTRIN_H */
diff --git a/clang/lib/Headers/movrs_avx10_2intrin.h b/clang/lib/Headers/movrs_avx10_2intrin.h
new file mode 100644
index 00000000000000..f38c78afe2ef94
--- /dev/null
+++ b/clang/lib/Headers/movrs_avx10_2intrin.h
@@ -0,0 +1,174 @@
+/*===---------- movrs_avx10_2intrin.h - AVX512MOVRS intrinsics -------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <movrs_avx10_2intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __MOVRS_AVX10_2INTRIN_H
+#define __MOVRS_AVX10_2INTRIN_H
+#ifdef __x86_64__
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("movrs,avx10.2-256"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("movrs,avx10.2-256"), __min_vector_width__(256)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi8(void const *__A) {
+  return (__m128i)__builtin_ia32_vmovrsb128((const __v16qi *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi8(__m128i __W, __mmask16 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectb_128(
+      (__mmask16)__U, (__v16qi)_mm_loadrs_epi8(__A), (__v16qi)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi8(__mmask16 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+                                             (__v16qi)_mm_loadrs_epi8(__A),
+                                             (__v16qi)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi8(void const *__A) {
+  return (__m256i)__builtin_ia32_vmovrsb256((const __v32qi *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi8(__m256i __W, __mmask32 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectb_256(
+      (__mmask32)__U, (__v32qi)_mm256_loadrs_epi8(__A), (__v32qi)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi8(__mmask32 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+                                             (__v32qi)_mm256_loadrs_epi8(__A),
+                                             (__v32qi)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi32(void const *__A) {
+  return (__m128i)__builtin_ia32_vmovrsd128((const __v4si *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi32(__m128i __W, __mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectd_128(
+      (__mmask8)__U, (__v4si)_mm_loadrs_epi32(__A), (__v4si)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi32(__mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+                                             (__v4si)_mm_loadrs_epi32(__A),
+                                             (__v4si)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi32(void const *__A) {
+  return (__m256i)__builtin_ia32_vmovrsd256((const __v8si *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi32(__m256i __W, __mmask8 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectd_256(
+      (__mmask8)__U, (__v8si)_mm256_loadrs_epi32(__A), (__v8si)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi32(__mmask8 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+                                             (__v8si)_mm256_loadrs_epi32(__A),
+                                             (__v8si)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi64(void const *__A) {
+  return (__m128i)__builtin_ia32_vmovrsq128((const __v2di *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi64(__m128i __W, __mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectq_128(
+      (__mmask8)__U, (__v2di)_mm_loadrs_epi64(__A), (__v2di)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+                                             (__v2di)_mm_loadrs_epi64(__A),
+                                             (__v2di)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi64(void const *__A) {
+  return (__m256i)__builtin_ia32_vmovrsq256((const __v4di *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi64(__m256i __W, __mmask8 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectq_256(
+      (__mmask8)__U, (__v4di)_mm256_loadrs_epi64(__A), (__v4di)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+                                             (__v4di)_mm256_loadrs_epi64(__A),
+                                             (__v4di)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi16(void const *__A) {
+  return (__m128i)__builtin_ia32_vmovrsw128((const __v8hi *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi16(__m128i __W, __mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectw_128(
+      (__mmask8)__U, (__v8hi)_mm_loadrs_epi16(__A), (__v8hi)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi16(__mmask8 __U, void const *__A) {
+  return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+                                             (__v8hi)_mm_loadrs_epi16(__A),
+                                             (__v8hi)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi16(void const *__A) {
+  return (__m256i)__builtin_ia32_vmovrsw256((const __v16hi *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi16(__m256i __W, __mmask16 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectw_256(
+      (__mmask16)__U, (__v16hi)_mm256_loadrs_epi16(__A), (__v16hi)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi16(__mmask16 __U, void const *__A) {
+  return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+                                             (__v16hi)_mm256_loadrs_epi16(__A),
+                                             (__v16hi)_mm256_setzero_si256());
+}
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif /* __x86_64__ */
+#endif /* __MOVRS_AVX10_2INTRIN_H */
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
new file mode 100644
index 00000000000000..944033724a6a2b
--- /dev/null
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -verify
+
+#include <immintrin.h>
+__m512i test_mm512_loadrs_epi8(const __m512i * __A) {
+  return _mm512_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi8(__m512i __A, __mmask64 __B, const __m512i * __C) {
+  return _mm512_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi8(__mmask64 __A, const __m512i * __B) {
+  return _mm512_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_loadrs_epi32(const __m512i * __A) {
+  return _mm512_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi32(__m512i __A, __mmask16 __B, const __m512i * __C) {
+  return _mm512_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi32(__mmask16 __A, const __m512i * __B) {
+  return _mm512_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_loadrs_epi64(const __m512i * __A) {
+  return _mm512_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi64(__m512i __A, __mmask8 __B, const __m512i * __C) {
+  return _mm512_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi64(__mmask8 __A, const __m512i * __B) {
+  return _mm512_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_loadrs_epi16(const __m512i * __A) {
+  return _mm512_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi16(__m512i __A, __mmask32 __B, const __m512i * __C) {
+  return _mm512_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi16(__mmask32 __A, const __m512i * __B) {
+  return _mm512_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c
new file mode 100644
index 00000000000000..997d6dbc53a8b0
--- /dev/null
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512i test_mm512_loadrs_epi8(const __m512i * __A) {
+  // CHECK-LABEL: @test_mm512_loadrs_epi8(
+  // CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512(
+  return _mm512_loadrs_epi8(__A);
+}
+
+__m512i test_mm512_mask_loadrs_epi8(__m512i __A, __mmask64 __B, const __m512i * __C) {
+  // CHECK-LABEL: @test_mm512_mask_loadrs_epi8(
+  // CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512(
+  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
+  return _mm512_mask_loadrs_epi8(__A, __B, __C);
+}
+
+__m512i test_mm512_maskz_loadrs_epi8(__mmask64 __A, const __m512i * __B) {
+  // CHECK-LABEL: @test_mm512_maskz_loadrs_epi8(
+  // CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512(
+  // CHECK: store <8 x i64> zeroinitializer
+  // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
+  return _mm512_maskz_loadrs_epi8(__A, __B);
+}
+
+__m512i test_mm512_loadrs_epi32(const __m512i * __A) {
+  // CHECK-LABEL: @test_mm512_loadrs_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512(
+  return _mm512_loadrs_epi32(__A);
+}
+
+__m512i test_mm512_mask_loadrs_epi32(__m512i __A, __mmask16 __B, const __m512i * __C) {
+  // CHECK-LABEL: @test_mm512_mask_loadrs_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512(
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_mask_loadrs_epi32(__A, __B, __C);
+}
+
+__m512i test_mm512_maskz_loadrs_epi32(__mmask16 __A, const __m512i * __B) {
+  // CHECK-LABEL: @test_mm512_maskz_loadrs_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512(
+  // CHECK: store <8 x i64> zeroinitializer
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+  return _mm512_maskz_loadrs_epi32(__A, __B);
+}
+
+__m512i test_mm512_loadrs_epi64(const __m512i * __A) {
+  // CHECK-LABEL: @test_mm512_loadrs_epi64(
+  // CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512(
+  return _mm512_loadrs_epi64(__A);
+}
+
+__m512i test_mm512_mask_loadrs_epi64(__m512i __A, __mmask8 __B, const __m512i * __C) {
+  // CHECK-LABEL: @test_mm512_mask_loadrs_epi64(
+  // CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512(
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_mask_loadrs_epi64(__A, __B, __C);
+}
+
+__m512i test_mm512_maskz_loadrs_epi64(__mmask8 __A, const __m512i * __B) {
+  // CHECK-LABEL: @test_mm512_maskz_loadrs_epi64(
+  // CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512(
+  // CHECK: store <8 x i64> zeroinitializer
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+  return _mm512_maskz_loadrs_epi64(__A, __B);
+}
+
+__m512i test_mm512_loadrs_epi16(const __m512i * __A) {
+  // CHECK-LABEL: @test_mm512_loadrs_epi16(
+  // CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512(
+  return _mm512_loadrs_epi16(__A);
+}
+
+__m512i test_mm512_mask_loadrs_epi16(__m512i __A, __mmask32 __B, const __m512i * __C) {
+  // CHECK-LABEL: @test_mm512_mask_loadrs_epi16(
+  // CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512(
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_mask_loadrs_epi16(__A, __B, __C);
+}
+
+__m512i test_mm512_maskz_loadrs_epi16(__mmask32 __A, const __m512i * __B) {
+  // CHECK-LABEL: @test_mm512_maskz_loadrs_epi16(
+  // CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512(
+  // CHECK: store <8 x i64> zeroinitializer
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+  return _mm512_maskz_loadrs_epi16(__A, __B);
+}
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
new file mode 100644
index 00000000000000..68608b0cbff09f
--- /dev/null
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -verify
+
+#include <immintrin.h>
+__m128i test_mm_loadrs_epi8(const __m128i * __A) {
+  return _mm_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_mask_loadrs_epi8(__m128i __A, __mmask16 __B, const __m128i * __C) {
+  return _mm_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_maskz_loadrs_epi8(__mmask16 __A, const __m128i * __B) {
+  return _mm_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m256i test_mm256_loadrs_epi8(const __m256i * __A) {
+  return _mm256_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_mask_loadrs_epi8(__m256i __A, __mmask32 __B, const __m256i * __C) {
+  return _mm256_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_maskz_loadrs_epi8(__mmask32 __A, const __m256i * __B) {
+  return _mm256_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m128i test_mm_loadrs_epi32(const __m128i * __A) {
+  return _mm_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_mask_loadrs_epi32(__m128i __A, __mmask8 __B, const __m128i * __C) {
+  return _mm_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_maskz_loadrs_epi32(__mmask8 __A, const __m128i * __B) {
+  return _mm_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m256i test_mm256_loadrs_epi32(const __m256i * __A) {
+  return _mm256_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_mask_loadrs_epi32(__m256i __A, __mmask8 __B, const __m256i * __C) {
+  return _mm256_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_maskz_loadrs_epi32(__mmask8 __A, const __m256i * __B) {
+  return _mm256_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m128i test_mm_loadrs_epi64(const __m128i * __A) {
+  return _mm_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_mask_loadrs_epi64(__m128i __A, __mmask8 __B, const __m128i * __C) {
+  return _mm_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_maskz_loadrs_epi64(__mmask8 __A, const __m128i * __B) {
+  return _mm_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m256i test_mm256_loadrs_epi64(const __m256i * __A) {
+  return _mm256_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_mask_loadrs_epi64(__m256i __A, __mmask8 __B, const __m256i * __C) {
+  return _mm256_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_maskz_loadrs_epi64(__mmask8 __A, const __m256i * __B) {
+  return _mm256_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m128i test_mm_loadrs_epi16(const __m128i * __A) {
+  return _mm_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_mask_loadrs_epi16(__m128i __A, __mmask8 __B, const __m128i * __C) {
+  return _mm_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_maskz_loadrs_epi16(__mmask8 __A, const __m128i * __B) {
+  return _mm_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m256i test_mm256_loadrs_epi16(const __m256i * __A) {
+  return _mm256_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_mask_loadrs_epi16(__m256i __A, __mmask16 __B, const __m256i * __C) {
+  return _mm256_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_maskz_loadrs_epi16(__mmask16 __A, const __m256i * __B) {
+  return _mm256_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c
new file mode 100644
index 00000000000000..2011b2a8624738
--- /dev/null
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c
@@ -0,0 +1,171 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m128i test_mm_loadrs_epi8(const __m128i * __A) {
+  // CHECK-LABEL: @test_mm_loadrs_epi8(
+  // CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128(
+  return _mm_loadrs_epi8(__A);
+}
+
+__m128i test_mm_mask_loadrs_epi8(__m128i __A, __mmask16 __B, const __m128i * __C) {
+  // CHECK-LABEL: @test_mm_mask_loadrs_epi8(
+  // CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128(
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+  return _mm_mask_loadrs_epi8(__A, __B, __C);
+}
+
+__m128i test_mm_maskz_loadrs_epi8(__mmask16 __A, const __m128i * __B) {
+  // CHECK-LABEL: @test_mm_maskz_loadrs_epi8(
+  // CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128(
+  // CHECK: store <2 x i64> zeroinitializer
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+  return _mm_maskz_loadrs_epi8(__A, __B);
+}
+
+__m256i test_mm256_loadrs_epi8(const __m256i * __A) {
+  // CHECK-LABEL: @test_mm256_loadrs_epi8(
+  // CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256(
+  return _mm256_loadrs_epi8(__A);
+}
+
+__m256i test_mm256_mask_loadrs_epi8(__m256i __A, __mmask32 __B, const __m256i * __C) {
+  // CHECK-LABEL: @test_mm256_mask_loadrs_epi8(
+  // CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256(
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
+  return _mm256_mask_loadrs_epi8(__A, __B, __C);
+}
+
+__m256i test_mm256_maskz_loadrs_epi8(__mmask32 __A, const __m256i * __B) {
+  // CHECK-LABEL: @test_mm256_maskz_loadrs_epi8(
+  // CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256(
+  // CHECK: store <4 x i64> zeroinitializer
+  // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
+  return _mm256_maskz_loadrs_epi8(__A, __B);
+}
+
+__m128i test_mm_loadrs_epi32(const __m128i * __A) {
+  // CHECK-LABEL: @test_mm_loadrs_epi32(
+  // CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128(
+  return _mm_loadrs_epi32(__A);
+}
+
+__m128i test_mm_mask_loadrs_epi32(__m128i __A, __mmask8 __B, const __m128i * __C) {
+  // CHECK-LABEL: @test_mm_mask_loadrs_epi32(
+  // CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128(
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  return _mm_mask_loadrs_epi32(__A, __B, __C);
+}
+
+__m128i test_mm_maskz_loadrs_epi32(__mmask8 __A, const __m128i * __B) {
+  // CHECK-LABEL: @test_mm_maskz_loadrs_epi32(
+  // CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128(
+  // CHECK: store <2 x i64> zeroinitializer
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+  return _mm_maskz_loadrs_epi32(__A, __B);
+}
+
+__m256i test_mm256_loadrs_epi32(const __m256i * __A) {
+  // CHECK-LABEL: @test_mm256_loadrs_epi32(
+  // CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256(
+  return _mm256_loadrs_epi32(__A);
+}
+
+__m256i test_mm256_mask_loadrs_epi32(__m256i __A, __mmask8 __B, const __m256i * __C) {
+  // CHECK-LABEL: @test_mm256_mask_loadrs_epi32(
+  // CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256(
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
+  return _mm256_mask_loadrs_epi32(__A, __B, __C);
+}
+
+__m256i test_mm256_maskz_loadrs_epi32(__mmask8 __A, const __m256i * __B) {
+  // CHECK-LABEL: @test_mm256_maskz_loadrs_epi32(
+  // CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256(
+  // CHECK: store <4 x i64> zeroinitializer
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
+  return _mm256_maskz_loadrs_epi32(__A, __B);
+}
+
+__m128i test_mm_loadrs_epi64(const __m128i * __A) {
+  // CHECK-LABEL: @test_mm_loadrs_epi64(
+  // CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128(
+  return _mm_loadrs_epi64(__A);
+}
+
+__m128i test_mm_mask_loadrs_epi64(__m128i __A, __mmask8 __B, const __m128i * __C) {
+  // CHECK-LABEL: @test_mm_mask_loadrs_epi64(
+  // CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128(
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
+  return _mm_mask_loadrs_epi64(__A, __B, __C);
+}
+
+__m128i test_mm_maskz_loadrs_epi64(__mmask8 __A, const __m128i * __B) {
+  // CHECK-LABEL: @test_mm_maskz_loadrs_epi64(
+  // CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128(
+  // CHECK: store <2 x i64> zeroinitializer
+  // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
+  return _mm_maskz_loadrs_epi64(__A, __B);
+}
+
+__m256i test_mm256_loadrs_epi64(const __m256i * __A) {
+  // CHECK-LABEL: @test_mm256_loadrs_epi64(
+  // CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256(
+  return _mm256_loadrs_epi64(__A);
+}
+
+__m256i test_mm256_mask_loadrs_epi64(__m256i __A, __mmask8 __B, const __m256i * __C) {
+  // CHECK-LABEL: @test_mm256_mask_loadrs_epi64(
+  // CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256(
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
+  return _mm256_mask_loadrs_epi64(__A, __B, __C);
+}
+
+__m256i test_mm256_maskz_loadrs_epi64(__mmask8 __A, const __m256i * __B) {
+  // CHECK-LABEL: @test_mm256_maskz_loadrs_epi64(
+  // CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256(
+  // CHECK: store <4 x i64> zeroinitializer
+  // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
+  return _mm256_maskz_loadrs_epi64(__A, __B);
+}
+
+__m128i test_mm_loadrs_epi16(const __m128i * __A) {
+  // CHECK-LABEL: @test_mm_loadrs_epi16(
+  // CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128(
+  return _mm_loadrs_epi16(__A);
+}
+
+__m128i test_mm_mask_loadrs_epi16(__m128i __A, __mmask8 __B, const __m128i * __C) {
+  // CHECK-LABEL: @test_mm_mask_loadrs_epi16(
+  // CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128(
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  return _mm_mask_loadrs_epi16(__A, __B, __C);
+}
+
+__m128i test_mm_maskz_loadrs_epi16(__mmask8 __A, const __m128i * __B) {
+  // CHECK-LABEL: @test_mm_maskz_loadrs_epi16(
+  // CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128(
+  // CHECK: store <2 x i64> zeroinitializer
+  // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+  return _mm_maskz_loadrs_epi16(__A, __B);
+}
+
+__m256i test_mm256_loadrs_epi16(const __m256i * __A) {
+  // CHECK-LABEL: @test_mm256_loadrs_epi16(
+  // CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256(
+  return _mm256_loadrs_epi16(__A);
+}
+
+__m256i test_mm256_mask_loadrs_epi16(__m256i __A, __mmask16 __B, const __m256i * __C) {
+  // CHECK-LABEL: @test_mm256_mask_loadrs_epi16(
+  // CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256(
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  return _mm256_mask_loadrs_epi16(__A, __B, __C);
+}
+
+__m256i test_mm256_maskz_loadrs_epi16(__mmask16 __A, const __m256i * __B) {
+  // CHECK-LABEL: @test_mm256_maskz_loadrs_epi16(
+  // CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256(
+  // CHECK: store <4 x i64> zeroinitializer
+  // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+  return _mm256_maskz_loadrs_epi16(__A, __B);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5262e3154ff721..33070fef7a7351 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -7572,3 +7572,42 @@ def int_x86_avx10_vfnmsub231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub231
         DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
                               [IntrNoMem]>;
 }
+
+let TargetPrefix = "x86" in {
+def int_x86_avx10_vmovrsb128 : ClangBuiltin<"__builtin_ia32_vmovrsb128">,
+        Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsb256 : ClangBuiltin<"__builtin_ia32_vmovrsb256">,
+        Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsb512 : ClangBuiltin<"__builtin_ia32_vmovrsb512">,
+        Intrinsic<[llvm_v64i8_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsd128 : ClangBuiltin<"__builtin_ia32_vmovrsd128">,
+        Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsd256 : ClangBuiltin<"__builtin_ia32_vmovrsd256">,
+        Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsd512 : ClangBuiltin<"__builtin_ia32_vmovrsd512">,
+        Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsq128 : ClangBuiltin<"__builtin_ia32_vmovrsq128">,
+        Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsq256 : ClangBuiltin<"__builtin_ia32_vmovrsq256">,
+        Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsq512 : ClangBuiltin<"__builtin_ia32_vmovrsq512">,
+        Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsw128 : ClangBuiltin<"__builtin_ia32_vmovrsw128">,
+        Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsw256 : ClangBuiltin<"__builtin_ia32_vmovrsw256">,
+        Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+def int_x86_avx10_vmovrsw512 : ClangBuiltin<"__builtin_ia32_vmovrsw512">,
+        Intrinsic<[llvm_v32i16_ty], [llvm_ptr_ty],
+                  [IntrReadMem]>;
+}
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index e5bf196559ba63..80468146a02b00 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -261,6 +261,7 @@ X86_FEATURE_COMPAT(AVX10_1,         "avx10.1-256",           36)
 X86_FEATURE_COMPAT(AVX10_1_512,     "avx10.1-512",           37)
 X86_FEATURE_COMPAT(AVX10_2,         "avx10.2-256",            0)
 X86_FEATURE_COMPAT(AVX10_2_512,     "avx10.2-512",            0)
+X86_FEATURE_COMPAT(MOVRS,           "movrs",                  0)
 X86_FEATURE       (ZU,              "zu")
 // These features aren't really CPU features, but the frontend can set them.
 X86_FEATURE       (RETPOLINE_EXTERNAL_THUNK,    "retpoline-external-thunk")
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index d57450d91ea2dd..6bedf9e1d13ac3 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -351,6 +351,8 @@ def FeatureZU : SubtargetFeature<"zu", "HasZU", "true",
 def FeatureUseGPR32InInlineAsm
     : SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
                        "Enable use of GPR32 in inline assembly for APX">;
+def FeatureMOVRS   : SubtargetFeature<"movrs", "HasMOVRS", "true",
+                           "Enable MOVRS", []>;
 
 // Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
 // "string operations"). See "REP String Enhancement" in the Intel Software
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..9ef2debb57fa00 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1647,3 +1647,31 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
   def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
                   (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
 }
+
+// MOVRS
+multiclass vmovrs_p<bits<8> opc, string OpStr, X86VectorVTInfo _> {
+  let ExeDomain = _.ExeDomain in {
+    defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+                            (ins _.MemOp:$src), OpStr, "$src", "$src",
+                            (_.VT (!cast<Intrinsic>("int_x86_avx10_"#OpStr#_.Size)
+                                   addr:$src))>, EVEX;
+  }
+}
+
+multiclass vmovrs_p_vl<bits<8> opc, string OpStr, AVX512VLVectorVTInfo _Vec> {
+  let Predicates = [HasMOVRS, HasAVX10_2_512] in
+    defm Z : vmovrs_p<opc, OpStr, _Vec.info512>, EVEX_V512;
+  let Predicates = [HasMOVRS, HasAVX10_2] in {
+    defm Z128 : vmovrs_p<opc, OpStr, _Vec.info128>, EVEX_V128;
+    defm Z256 : vmovrs_p<opc, OpStr, _Vec.info256>, EVEX_V256;
+  }
+}
+
+defm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>,
+                          T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>;
+defm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>,
+                          T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>;
+defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>,
+                          T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>;
+defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>,
+                          T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>;
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index a815ddc9714f0c..7fb566fba51818 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -152,6 +152,7 @@ def HasCLZERO    : Predicate<"Subtarget->hasCLZERO()">;
 def HasCLDEMOTE  : Predicate<"Subtarget->hasCLDEMOTE()">;
 def HasMOVDIRI   : Predicate<"Subtarget->hasMOVDIRI()">;
 def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">;
+def HasMOVRS     : Predicate<"Subtarget->hasMOVRS()">;
 def HasPTWRITE   : Predicate<"Subtarget->hasPTWRITE()">;
 def FPStackf32   : Predicate<"!Subtarget->hasSSE1()">;
 def FPStackf64   : Predicate<"!Subtarget->hasSSE2()">;
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 09d4312918acfe..586df5748aa822 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -639,6 +639,8 @@ constexpr FeatureBitset ImpliedFeaturesNF = {};
 constexpr FeatureBitset ImpliedFeaturesCF = {};
 constexpr FeatureBitset ImpliedFeaturesZU = {};
 
+constexpr FeatureBitset ImpliedFeaturesMOVRS = {};
+
 constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
 #define X86_FEATURE(ENUM, STR) {{"+" STR}, ImpliedFeatures##ENUM},
 #include "llvm/TargetParser/X86TargetParser.def"
diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
new file mode 100644
index 00000000000000..a730ef519c015e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-512 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK
+
+declare <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr)
+declare <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr)
+declare <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr)
+declare <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr)
+
+define <8 x i64> @test_mm512_movrsb_epi8(ptr %__A) {
+; CHECK-LABEL: test_mm512_movrsb_epi8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsb (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7f,0x48,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__A)
+  %1 = bitcast <64 x i8> %0 to <8 x i64>
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_mm512_mask_movrsb_epi8(<8 x i64> %__A, i64 %__B, ptr %__C) {
+; CHECK-LABEL: test_mm512_mask_movrsb_epi8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsb (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__C)
+  %1 = bitcast <8 x i64> %__A to <64 x i8>
+  %2 = bitcast i64 %__B to <64 x i1>
+  %3 = select <64 x i1> %2, <64 x i8> %0, <64 x i8> %1
+  %4 = bitcast <64 x i8> %3 to <8 x i64>
+  ret <8 x i64> %4
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_movrsb_epi8(i64 %__A, ptr %__B) {
+; CHECK-LABEL: test_mm512_maskz_movrsb_epi8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsb (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__B)
+  %1 = bitcast i64 %__A to <64 x i1>
+  %2 = select <64 x i1> %1, <64 x i8> %0, <64 x i8> zeroinitializer
+  %3 = bitcast <64 x i8> %2 to <8 x i64>
+  ret <8 x i64> %3
+}
+
+define <8 x i64> @test_mm512_movrsd_epi32(ptr %__A) {
+; CHECK-LABEL: test_mm512_movrsd_epi32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsd (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7e,0x48,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__A)
+  %1 = bitcast <16 x i32> %0 to <8 x i64>
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_mm512_mask_movrsd_epi32(<8 x i64> %__A, i16 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm512_mask_movrsd_epi32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsd (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x49,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__C)
+  %1 = bitcast <8 x i64> %__A to <16 x i32>
+  %2 = bitcast i16 %__B to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
+  %4 = bitcast <16 x i32> %3 to <8 x i64>
+  ret <8 x i64> %4
+}
+
+define <8 x i64> @test_mm512_maskz_movrsd_epi32(i16 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm512_maskz_movrsd_epi32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsd (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0xc9,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__B)
+  %1 = bitcast i16 %__A to <16 x i1>
+  %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer
+  %3 = bitcast <16 x i32> %2 to <8 x i64>
+  ret <8 x i64> %3
+}
+
+define <8 x i64> @test_mm512_movrsq_epi64(ptr %__A) {
+; CHECK-LABEL: test_mm512_movrsq_epi64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsq (%rdi), %zmm0 # encoding: [0x62,0xf5,0xfe,0x48,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__A)
+  ret <8 x i64> %0
+}
+
+define <8 x i64> @test_mm512_mask_movrsq_epi64(<8 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm512_mask_movrsq_epi64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsq (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x49,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__C)
+  %1 = bitcast i8 %__B to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A
+  ret <8 x i64> %2
+}
+
+define <8 x i64> @test_mm512_maskz_movrsq_epi64(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm512_maskz_movrsq_epi64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsq (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0xc9,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__B)
+  %1 = bitcast i8 %__A to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
+  ret <8 x i64> %2
+}
+
+define <8 x i64> @test_mm512_movrsw_epi16(ptr %__A) {
+; CHECK-LABEL: test_mm512_movrsw_epi16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsw (%rdi), %zmm0 # encoding: [0x62,0xf5,0xff,0x48,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__A)
+  %1 = bitcast <32 x i16> %0 to <8 x i64>
+  ret <8 x i64> %1
+}
+
+define <8 x i64> @test_mm512_mask_movrsw_epi16(<8 x i64> %__A, i32 %__B, ptr %__C) {
+; CHECK-LABEL: test_mm512_mask_movrsw_epi16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsw (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0xff,0x49,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__C)
+  %1 = bitcast <8 x i64> %__A to <32 x i16>
+  %2 = bitcast i32 %__B to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i16> %0, <32 x i16> %1
+  %4 = bitcast <32 x i16> %3 to <8 x i64>
+  ret <8 x i64> %4
+}
+
+define <8 x i64> @test_mm512_maskz_movrsw_epi16(i32 %__A, ptr %__B) {
+; CHECK-LABEL: test_mm512_maskz_movrsw_epi16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsw (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0xc9,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__B)
+  %1 = bitcast i32 %__A to <32 x i1>
+  %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer
+  %3 = bitcast <32 x i16> %2 to <8 x i64>
+  ret <8 x i64> %3
+}
diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
new file mode 100644
index 00000000000000..583e16351652b2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
@@ -0,0 +1,329 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-256 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK
+
+define <2 x i64> @test_mm_movrsb_epu8(ptr %__A) {
+; CHECK-LABEL: test_mm_movrsb_epu8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsb (%rdi), %xmm0 # encoding: [0x62,0xf5,0x7f,0x08,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__A)
+  %1 = bitcast <16 x i8> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm_mask_movrsb_epu8(<2 x i64> %__A, i16 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm_mask_movrsb_epu8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsb (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__C)
+  %1 = bitcast <2 x i64> %__A to <16 x i8>
+  %2 = bitcast i16 %__B to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1
+  %4 = bitcast <16 x i8> %3 to <2 x i64>
+  ret <2 x i64> %4
+}
+
+define <2 x i64> @test_mm_maskz_movrsb_epu8(i16 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm_maskz_movrsb_epu8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsb (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__B )
+  %1 = bitcast i16 %__A to <16 x i1>
+  %2 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> zeroinitializer
+  %3 = bitcast <16 x i8> %2 to <2 x i64>
+  ret <2 x i64> %3
+}
+
+define <4 x i64> @test_mm256_movrsb_epu8(ptr %__A) {
+; CHECK-LABEL: test_mm256_movrsb_epu8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsb (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7f,0x28,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__A)
+  %1 = bitcast <32 x i8> %0 to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm256_mask_movrsb_epu8(<4 x i64> %__A, i32 %__B, ptr %__C) {
+; CHECK-LABEL: test_mm256_mask_movrsb_epu8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsb (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__C)
+  %1 = bitcast <4 x i64> %__A to <32 x i8>
+  %2 = bitcast i32 %__B to <32 x i1>
+  %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1
+  %4 = bitcast <32 x i8> %3 to <4 x i64>
+  ret <4 x i64> %4
+}
+
+define <4 x i64> @test_mm256_maskz_movrsb_epu8(i32 %__A, ptr %__B) {
+; CHECK-LABEL: test_mm256_maskz_movrsb_epu8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsb (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__B)
+  %1 = bitcast i32 %__A to <32 x i1>
+  %2 = select <32 x i1> %1, <32 x i8> %0, <32 x i8> zeroinitializer
+  %3 = bitcast <32 x i8> %2 to <4 x i64>
+  ret <4 x i64> %3
+}
+
+define <2 x i64> @test_mm_movrsd_epu32(ptr %__A) {
+; CHECK-LABEL: test_mm_movrsd_epu32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsd (%rdi), %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__A)
+  %1 = bitcast <4 x i32> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm_mask_movrsd_epu32(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm_mask_movrsd_epu32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsd (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__C)
+  %1 = bitcast <2 x i64> %__A to <4 x i32>
+  %2 = bitcast i8 %__B to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %2, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1
+  %4 = bitcast <4 x i32> %3 to <2 x i64>
+  ret <2 x i64> %4
+}
+
+define <2 x i64> @test_mm_maskz_movrsd_epu32(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm_maskz_movrsd_epu32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsd (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0x89,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__B)
+  %1 = bitcast i8 %__A to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> zeroinitializer
+  %3 = bitcast <4 x i32> %2 to <2 x i64>
+  ret <2 x i64> %3
+}
+
+define <4 x i64> @test_mm256_movrsd_epu32(ptr %__A) {
+; CHECK-LABEL: test_mm256_movrsd_epu32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsd (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7e,0x28,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__A)
+  %1 = bitcast <8 x i32> %0 to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm256_mask_movrsd_epu32(<4 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm256_mask_movrsd_epu32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsd (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x29,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__C)
+  %1 = bitcast <4 x i64> %__A to <8 x i32>
+  %2 = bitcast i8 %__B to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1
+  %4 = bitcast <8 x i32> %3 to <4 x i64>
+  ret <4 x i64> %4
+}
+
+define <4 x i64> @test_mm256_maskz_movrsd_epu32(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm256_maskz_movrsd_epu32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsd (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0xa9,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__B)
+  %1 = bitcast i8 %__A to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer
+  %3 = bitcast <8 x i32> %2 to <4 x i64>
+  ret <4 x i64> %3
+}
+
+define <2 x i64> @test_mm_movrsq_epu64(ptr %__A) {
+; CHECK-LABEL: test_mm_movrsq_epu64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsq (%rdi), %xmm0 # encoding: [0x62,0xf5,0xfe,0x08,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__A)
+  ret <2 x i64> %0
+}
+
+define <2 x i64> @test_mm_mask_movrsq_epu64(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm_mask_movrsq_epu64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsq (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x09,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__C)
+  %1 = bitcast i8 %__B to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <2 x i32> <i32 0, i32 1>
+  %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__A
+  ret <2 x i64> %2
+}
+
+define <2 x i64> @test_mm_maskz_movrsq_epu64(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm_maskz_movrsq_epu64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsq (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0x89,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__B)
+  %1 = bitcast i8 %__A to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <2 x i32> <i32 0, i32 1>
+  %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
+  ret <2 x i64> %2
+}
+
+define <4 x i64> @test_mm256_movrsq_epu64(ptr %__A) {
+; CHECK-LABEL: test_mm256_movrsq_epu64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsq (%rdi), %ymm0 # encoding: [0x62,0xf5,0xfe,0x28,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__A)
+  ret <4 x i64> %0
+}
+
+define <4 x i64> @test_mm256_mask_movrsq_epu64(<4 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm256_mask_movrsq_epu64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsq (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x29,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__C)
+  %1 = bitcast i8 %__B to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__A
+  ret <4 x i64> %2
+}
+
+define <4 x i64> @test_mm256_maskz_movrsq_epu64(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm256_maskz_movrsq_epu64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsq (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0xa9,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__B)
+  %1 = bitcast i8 %__A to <8 x i1>
+  %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+  %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
+  ret <4 x i64> %2
+}
+
+define <2 x i64> @test_mm_movrsw_epu16(ptr %__A) {
+; CHECK-LABEL: test_mm_movrsw_epu16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsw (%rdi), %xmm0 # encoding: [0x62,0xf5,0xff,0x08,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__A)
+  %1 = bitcast <8 x i16> %0 to <2 x i64>
+  ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm_mask_movrsw_epu16(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm_mask_movrsw_epu16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsw (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0xff,0x09,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__C)
+  %1 = bitcast <2 x i64> %__A to <8 x i16>
+  %2 = bitcast i8 %__B to <8 x i1>
+  %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1
+  %4 = bitcast <8 x i16> %3 to <2 x i64>
+  ret <2 x i64> %4
+}
+
+define <2 x i64> @test_mm_maskz_movrsw_epu16(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm_maskz_movrsw_epu16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsw (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0x89,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__B)
+  %1 = bitcast i8 %__A to <8 x i1>
+  %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+  %3 = bitcast <8 x i16> %2 to <2 x i64>
+  ret <2 x i64> %3
+}
+
+define <4 x i64> @test_mm256_movrsw_epu16(ptr %__A) {
+; CHECK-LABEL: test_mm256_movrsw_epu16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmovrsw (%rdi), %ymm0 # encoding: [0x62,0xf5,0xff,0x28,0x6f,0x07]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__A)
+  %1 = bitcast <16 x i16> %0 to <4 x i64>
+  ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm256_mask_movrsw_epu16(<4 x i64> %__A, i16 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm256_mask_movrsw_epu16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsw (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0xff,0x29,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__C)
+  %1 = bitcast <4 x i64> %__A to <16 x i16>
+  %2 = bitcast i16 %__B to <16 x i1>
+  %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1
+  %4 = bitcast <16 x i16> %3 to <4 x i64>
+  ret <4 x i64> %4
+}
+
+define <4 x i64> @test_mm256_maskz_movrsw_epu16(i16 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm256_maskz_movrsw_epu16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT:    vmovrsw (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0xa9,0x6f,0x06]
+; CHECK-NEXT:    retq # encoding: [0xc3]
+entry:
+  %0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__B)
+  %1 = bitcast i16 %__A to <16 x i1>
+  %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+  %3 = bitcast <16 x i16> %2 to <4 x i64>
+  ret <4 x i64> %3
+}
+
+declare <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr)
+declare <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr)
+declare <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr)
+declare <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr)
+declare <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr)
+declare <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr)
+declare <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr)
+declare <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr)
diff --git a/llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt b/llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt
new file mode 100644
index 00000000000000..e25e66ae577438
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt
@@ -0,0 +1,98 @@
+# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding --disassemble < %s  | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   vmovrsb  268435456(%rbp,%r14,8), %zmm22
+# INTEL: vmovrsb zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7f,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vmovrsb  291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vmovrsb zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7f,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   vmovrsb  (%rip), %zmm22
+# INTEL: vmovrsb zmm22, zmmword ptr [rip]
+0x62,0xe5,0x7f,0x48,0x6f,0x35,0x00,0x00,0x00,0x00
+
+# ATT:   vmovrsb  -2048(,%rbp,2), %zmm22
+# INTEL: vmovrsb zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7f,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:   vmovrsb  8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vmovrsb zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7f,0xcf,0x6f,0x71,0x7f
+
+# ATT:   vmovrsb  -8192(%rdx), %zmm22 {%k7} {z}
+# INTEL: vmovrsb zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+0x62,0xe5,0x7f,0xcf,0x6f,0x72,0x80
+
+# ATT:   vmovrsd  268435456(%rbp,%r14,8), %zmm22
+# INTEL: vmovrsd zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7e,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vmovrsd  291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vmovrsd zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7e,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   vmovrsd  (%rip), %zmm22
+# INTEL: vmovrsd zmm22, zmmword ptr [rip]
+0x62,0xe5,0x7e,0x48,0x6f,0x35,0x00,0x00,0x00,0x00
+
+# ATT:   vmovrsd  -2048(,%rbp,2), %zmm22
+# INTEL: vmovrsd zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7e,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:   vmovrsd  8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vmovrsd zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7e,0xcf,0x6f,0x71,0x7f
+
+# ATT:   vmovrsd  -8192(%rdx), %zmm22 {%k7} {z}
+# INTEL: vmovrsd zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+0x62,0xe5,0x7e,0xcf,0x6f,0x72,0x80
+
+# ATT:   vmovrsq  268435456(%rbp,%r14,8), %zmm22
+# INTEL: vmovrsq zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfe,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vmovrsq  291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vmovrsq zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfe,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   vmovrsq  (%rip), %zmm22
+# INTEL: vmovrsq zmm22, zmmword ptr [rip]
+0x62,0xe5,0xfe,0x48,0x6f,0x35,0x00,0x00,0x00,0x00
+
+# ATT:   vmovrsq  -2048(,%rbp,2), %zmm22
+# INTEL: vmovrsq zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0xfe,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:   vmovrsq  8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vmovrsq zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0xfe,0xcf,0x6f,0x71,0x7f
+
+# ATT:   vmovrsq  -8192(%rdx), %zmm22 {%k7} {z}
+# INTEL: vmovrsq zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+0x62,0xe5,0xfe,0xcf,0x6f,0x72,0x80
+
+# ATT:   vmovrsw  268435456(%rbp,%r14,8), %zmm22
+# INTEL: vmovrsw zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xff,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:   vmovrsw  291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vmovrsw zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xff,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:   vmovrsw  (%rip), %zmm22
+# INTEL: vmovrsw zmm22, zmmword ptr [rip]
+0x62,0xe5,0xff,0x48,0x6f,0x35,0x00,0x00,0x00,0x00
+
+# ATT:   vmovrsw  -2048(,%rbp,2), %zmm22
+# INTEL: vmovrsw zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0xff,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:   vmovrsw  8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vmovrsw zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0xff,0xcf,0x6f,0x71,0x7f
+
+# ATT:   vmovrsw  -8192(%rdx), %zmm22 {%k7} {z}
+# INTEL: vmovrsw zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+0x62,0xe5,0xff,0xcf,0x6f,0x72,0x80
\ No newline at end of file
diff --git a/llvm/test/MC/X86/movrs-avx10-att-64.s b/llvm/test/MC/X86/movrs-avx10-att-64.s
new file mode 100644
index 00000000000000..982b7a1d41c039
--- /dev/null
+++ b/llvm/test/MC/X86/movrs-avx10-att-64.s
@@ -0,0 +1,98 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s  | FileCheck %s
+
+// CHECK: vmovrsb  268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7f,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovrsb  268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vmovrsb  291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7f,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovrsb  291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vmovrsb  (%rip), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+          vmovrsb  (%rip), %zmm22
+
+// CHECK: vmovrsb  -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+          vmovrsb  -2048(,%rbp,2), %zmm22
+
+// CHECK: vmovrsb  8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x71,0x7f]
+          vmovrsb  8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsb  -8192(%rdx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x72,0x80]
+          vmovrsb  -8192(%rdx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsd  268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovrsd  268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vmovrsd  291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7e,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovrsd  291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vmovrsd  (%rip), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+          vmovrsd  (%rip), %zmm22
+
+// CHECK: vmovrsd  -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+          vmovrsd  -2048(,%rbp,2), %zmm22
+
+// CHECK: vmovrsd  8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x71,0x7f]
+          vmovrsd  8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsd  -8192(%rdx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x72,0x80]
+          vmovrsd  -8192(%rdx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsq  268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0xfe,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovrsq  268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vmovrsq  291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfe,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovrsq  291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vmovrsq  (%rip), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+          vmovrsq  (%rip), %zmm22
+
+// CHECK: vmovrsq  -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+          vmovrsq  -2048(,%rbp,2), %zmm22
+
+// CHECK: vmovrsq  8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x71,0x7f]
+          vmovrsq  8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsq  -8192(%rdx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x72,0x80]
+          vmovrsq  -8192(%rdx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsw  268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0xff,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovrsw  268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vmovrsw  291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xff,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovrsw  291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vmovrsw  (%rip), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+          vmovrsw  (%rip), %zmm22
+
+// CHECK: vmovrsw  -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+          vmovrsw  -2048(,%rbp,2), %zmm22
+
+// CHECK: vmovrsw  8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x71,0x7f]
+          vmovrsw  8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsw  -8192(%rdx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x72,0x80]
+          vmovrsw  -8192(%rdx), %zmm22 {%k7} {z}
+
diff --git a/llvm/test/MC/X86/movrs-avx10-intel-64.s b/llvm/test/MC/X86/movrs-avx10-intel-64.s
new file mode 100644
index 00000000000000..d61e41abe7d632
--- /dev/null
+++ b/llvm/test/MC/X86/movrs-avx10-intel-64.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovrsb zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7f,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovrsb zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovrsb zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7f,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovrsb zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovrsb zmm22, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+          vmovrsb zmm22, zmmword ptr [rip]
+
+// CHECK: vmovrsb zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+          vmovrsb zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmovrsb zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x71,0x7f]
+          vmovrsb zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vmovrsb zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x72,0x80]
+          vmovrsb zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+
+// CHECK: vmovrsd zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7e,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovrsd zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovrsd zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7e,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovrsd zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovrsd zmm22, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+          vmovrsd zmm22, zmmword ptr [rip]
+
+// CHECK: vmovrsd zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+          vmovrsd zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmovrsd zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x71,0x7f]
+          vmovrsd zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vmovrsd zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x72,0x80]
+          vmovrsd zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+
+// CHECK: vmovrsq zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfe,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovrsq zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovrsq zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfe,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovrsq zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovrsq zmm22, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+          vmovrsq zmm22, zmmword ptr [rip]
+
+// CHECK: vmovrsq zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+          vmovrsq zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmovrsq zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x71,0x7f]
+          vmovrsq zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vmovrsq zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x72,0x80]
+          vmovrsq zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+
+// CHECK: vmovrsw zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xff,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovrsw zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovrsw zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xff,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovrsw zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovrsw zmm22, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+          vmovrsw zmm22, zmmword ptr [rip]
+
+// CHECK: vmovrsw zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+          vmovrsw zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmovrsw zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x71,0x7f]
+          vmovrsw zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vmovrsw zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x72,0x80]
+          vmovrsw zmm22 {k7} {z}, zmmword ptr [rdx - 8192]

>From dc9387dbf69b806180702465c406630194c27bee Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 23 Oct 2024 10:21:04 +0800
Subject: [PATCH 2/3] Add release notes and CFE tests for movrs.

---
 clang/docs/ReleaseNotes.rst                   | 3 +++
 clang/include/clang/Driver/Options.td         | 2 ++
 clang/lib/Basic/Targets/X86.cpp               | 2 ++
 clang/test/CodeGen/target-builtin-noerror.c   | 1 +
 clang/test/Driver/x86-target-features.c       | 5 +++++
 clang/test/Preprocessor/x86_target_features.c | 6 ++++++
 compiler-rt/lib/builtins/cpu_model/x86.c      | 3 +++
 llvm/docs/ReleaseNotes.md                     | 2 ++
 llvm/lib/TargetParser/Host.cpp                | 1 +
 9 files changed, 25 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b7a6ace8bb895d..f212cdef7864e3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -618,6 +618,9 @@ X86 Support
 
 - All intrinsics in tbmintrin.h can now be used in constant expressions.
 
+- Supported intrinsics for `MOVRS AND AVX10.2`
+  * Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
+
 Arm and AArch64 Support
 ^^^^^^^^^^^^^^^^^^^^^^^
 
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 152c43d7908ff8..4cb2e77f8d9f61 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6415,6 +6415,8 @@ def mmovdiri : Flag<["-"], "mmovdiri">, Group<m_x86_Features_Group>;
 def mno_movdiri : Flag<["-"], "mno-movdiri">, Group<m_x86_Features_Group>;
 def mmovdir64b : Flag<["-"], "mmovdir64b">, Group<m_x86_Features_Group>;
 def mno_movdir64b : Flag<["-"], "mno-movdir64b">, Group<m_x86_Features_Group>;
+def mmovrs : Flag<["-"], "mmovrs">, Group<m_x86_Features_Group>;
+def mno_movrs : Flag<["-"], "mno-movrs">, Group<m_x86_Features_Group>;
 def mmwaitx : Flag<["-"], "mmwaitx">, Group<m_x86_Features_Group>;
 def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group<m_x86_Features_Group>;
 def mpku : Flag<["-"], "mpku">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index d4d099504a6a71..d067ec218b5270 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -917,6 +917,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
     Builder.defineMacro("__MOVDIRI__");
   if (HasMOVDIR64B)
     Builder.defineMacro("__MOVDIR64B__");
+  if (HasMOVRS)
+    Builder.defineMacro("__MOVRS__");
   if (HasPCONFIG)
     Builder.defineMacro("__PCONFIG__");
   if (HasPTWRITE)
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 2a05074d7c2b68..1e53621bc6b5ae 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -145,6 +145,7 @@ void verifyfeaturestrings(void) {
   (void)__builtin_cpu_supports("avx10.1-512");
   (void)__builtin_cpu_supports("avx10.2-256");
   (void)__builtin_cpu_supports("avx10.2-512");
+  (void)__builtin_cpu_supports("movrs");
 }
 
 void verifycpustrings(void) {
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index ddfbb29a48f8d5..02370ef60b7feb 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -404,6 +404,11 @@
 // USERMSR: "-target-feature" "+usermsr"
 // NO-USERMSR: "-target-feature" "-usermsr"
 
+// RUN: %clang --target=i386 -mmovrs %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MOVRS %s
+// RUN: %clang --target=i386 -mno-movrs %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MOVRS %s
+// MOVRS: "-target-feature" "+movrs"
+// NO-MOVRS: "-target-feature" "-movrs"
+
 // RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
 // RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
 // CRC32: "-target-feature" "+crc32"
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 8b4e6bdc09226a..2d1d2e57bdc772 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -740,6 +740,12 @@
 // RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-usermsr -x c -E -dM -o - %s | FileCheck  -check-prefix=NO-USERMSR %s
 // NO-USERMSR-NOT: #define __USERMSR__ 1
 
+// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mmovrs -x c -E -dM -o - %s | FileCheck  -check-prefix=MOVRS %s
+// MOVRS: #define __MOVRS__ 1
+
+// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-movrs -x c -E -dM -o - %s | FileCheck  -check-prefix=NO-MOVRS %s
+// NO-MOVRS-NOT: #define __MOVRS__ 1
+
 // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
 
 // CRC32: #define __CRC32__ 1
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 23f8fa3e1fd490..c7ed9867f3dbd9 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -229,6 +229,7 @@ enum ProcessorFeatures {
   FEATURE_AVX10_1_512,
   FEATURE_AVX10_2_256,
   FEATURE_AVX10_2_512,
+  FEATURE_MOVRS,
   CPU_FEATURE_MAX
 };
 
@@ -970,6 +971,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
     setFeature(FEATURE_HRESET);
   if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
     setFeature(FEATURE_AVXIFMA);
+  if (HasLeaf7Subleaf1 && ((EAX >> 31) & 1))
+    setFeature(FEATURE_MOVRS);
 
   if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
     setFeature(FEATURE_AVXVNNIINT8);
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index e5853789c78b63..7e4a8258d8058e 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -196,6 +196,8 @@ Changes to the X86 Backend
 
 * Support ISA of `AVX10.2-256` and `AVX10.2-512`.
 
+* Supported instructions of `MOVRS AND AVX10.2`
+
 Changes to the OCaml bindings
 -----------------------------
 
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 9834aaacba18d0..a6d1ce64765dc0 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1839,6 +1839,7 @@ const StringMap<bool> sys::getHostCPUFeatures() {
   Features["cmpccxadd"]  = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
   Features["avxifma"]    = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
+  Features["movrs"]    = HasLeaf7Subleaf1 && ((EAX >> 31) & 1);
   Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
   Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
   Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;

>From 38ff3cf94f8427bf946cde0819092f2707f78909 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 23 Oct 2024 10:26:08 +0800
Subject: [PATCH 3/3] clang format

---
 llvm/lib/TargetParser/Host.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index a6d1ce64765dc0..489457d010064d 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1839,7 +1839,7 @@ const StringMap<bool> sys::getHostCPUFeatures() {
   Features["cmpccxadd"]  = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
   Features["hreset"]     = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
   Features["avxifma"]    = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
-  Features["movrs"]    = HasLeaf7Subleaf1 && ((EAX >> 31) & 1);
+  Features["movrs"] = HasLeaf7Subleaf1 && ((EAX >> 31) & 1);
   Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
   Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
   Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;



More information about the cfe-commits mailing list