[clang] [compiler-rt] [llvm] [X86] Support MOVRS and AVX10.2 instructions. (PR #113274)
Freddy Ye via cfe-commits
cfe-commits at lists.llvm.org
Wed Oct 23 22:24:57 PDT 2024
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/113274
>From 19c6400ac7127860ac1712941acbd1585614d17d Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 21 Oct 2024 10:24:14 +0800
Subject: [PATCH 1/4] [X86] Support MOVRS and AVX10.2 instructions.
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368
---
clang/include/clang/Basic/BuiltinsX86.def | 13 +
clang/lib/Basic/Targets/X86.cpp | 4 +
clang/lib/Basic/Targets/X86.h | 1 +
clang/lib/Headers/CMakeLists.txt | 2 +
clang/lib/Headers/immintrin.h | 10 +
clang/lib/Headers/movrs_avx10_2_512intrin.h | 98 ++++++
clang/lib/Headers/movrs_avx10_2intrin.h | 174 +++++++++
.../X86/movrs-avx10.2-512-builtins-error-32.c | 50 +++
.../CodeGen/X86/movrs-avx10.2-512-builtins.c | 87 +++++
.../X86/movrs-avx10.2-builtins-error-32.c | 98 ++++++
.../test/CodeGen/X86/movrs-avx10.2-builtins.c | 171 +++++++++
llvm/include/llvm/IR/IntrinsicsX86.td | 39 +++
.../llvm/TargetParser/X86TargetParser.def | 1 +
llvm/lib/Target/X86/X86.td | 2 +
llvm/lib/Target/X86/X86InstrAVX10.td | 28 ++
llvm/lib/Target/X86/X86InstrPredicates.td | 1 +
llvm/lib/TargetParser/X86TargetParser.cpp | 2 +
.../X86/movrs-avx10.2-512-intrinsics.ll | 163 +++++++++
.../CodeGen/X86/movrs-avx10.2-intrinsics.ll | 329 ++++++++++++++++++
.../MC/Disassembler/X86/movrs-avx10-64.txt | 98 ++++++
llvm/test/MC/X86/movrs-avx10-att-64.s | 98 ++++++
llvm/test/MC/X86/movrs-avx10-intel-64.s | 97 ++++++
22 files changed, 1566 insertions(+)
create mode 100644 clang/lib/Headers/movrs_avx10_2_512intrin.h
create mode 100644 clang/lib/Headers/movrs_avx10_2intrin.h
create mode 100644 clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
create mode 100644 clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c
create mode 100644 clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
create mode 100644 clang/test/CodeGen/X86/movrs-avx10.2-builtins.c
create mode 100644 llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
create mode 100644 llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
create mode 100644 llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt
create mode 100644 llvm/test/MC/X86/movrs-avx10-att-64.s
create mode 100644 llvm/test/MC/X86/movrs-avx10-intel-64.s
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421ca..17ee3df85ff7a6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2339,6 +2339,19 @@ TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh512, "V32yV32yV32yV32y", "ncV:512:", "a
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh256, "V16yV16yV16yV16y", "ncV:256:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vfmaddnepbh128, "V8yV8yV8yV8y", "ncV:128:", "avx10.2-256")
+// MOVRS and AVX10.2
+TARGET_BUILTIN(__builtin_ia32_vmovrsb128, "V16cV16cC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsb256, "V32cV32cC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsb512, "V64cV64cC*", "nV:512:", "movrs,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmovrsd128, "V4iV4iC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsd256, "V8iV8iC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsd512, "V16iV16iC*", "nV:512:", "movrs,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmovrsq128, "V2OiV2OiC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsq256, "V4OiV4OiC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsq512, "V8OiV8OiC*", "nV:512:", "movrs,avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vmovrsw128, "V8sV8sC*", "nV:128:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsw256, "V16sV16sC*", "nV:256:", "movrs,avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmovrsw512, "V32sV32sC*", "nV:512:", "movrs,avx10.2-512")
#undef BUILTIN
#undef TARGET_BUILTIN
#undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 5448bd841959f4..d4d099504a6a71 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -348,6 +348,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasSM4 = true;
} else if (Feature == "+movbe") {
HasMOVBE = true;
+ } else if (Feature == "+movrs") {
+ HasMOVRS = true;
} else if (Feature == "+sgx") {
HasSGX = true;
} else if (Feature == "+cx8") {
@@ -1116,6 +1118,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("lzcnt", true)
.Case("mmx", true)
.Case("movbe", true)
+ .Case("movrs", true)
.Case("movdiri", true)
.Case("movdir64b", true)
.Case("mwaitx", true)
@@ -1233,6 +1236,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("lzcnt", HasLZCNT)
.Case("mmx", HasMMX)
.Case("movbe", HasMOVBE)
+ .Case("movrs", HasMOVRS)
.Case("movdiri", HasMOVDIRI)
.Case("movdir64b", HasMOVDIR64B)
.Case("mwaitx", HasMWAITX)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index a99ae62984c7d5..05fef8c1344853 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -130,6 +130,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasCLFLUSHOPT = false;
bool HasCLWB = false;
bool HasMOVBE = false;
+ bool HasMOVRS = false;
bool HasPREFETCHI = false;
bool HasRDPID = false;
bool HasRDPRU = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..e97953d87a2ff9 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -221,6 +221,8 @@ set(x86_files
mm3dnow.h
mmintrin.h
movdirintrin.h
+ movrs_avx10_2_512intrin.h
+ movrs_avx10_2intrin.h
mwaitxintrin.h
nmmintrin.h
pconfigintrin.h
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..5f296d0a3324d0 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -605,6 +605,16 @@ _storebe_i64(void * __P, long long __D) {
#include <movdirintrin.h>
#endif
+#if !defined(__SCE__) || __has_feature(modules) || \
+ (defined(__AVX10_2__) && defined(__MOVRS__))
+#include <movrs_avx10_2intrin.h>
+#endif
+
+#if !defined(__SCE__) || __has_feature(modules) || \
+ (defined(__AVX10_2_512__) && defined(__MOVRS__))
+#include <movrs_avx10_2_512intrin.h>
+#endif
+
#if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
#include <pconfigintrin.h>
#endif
diff --git a/clang/lib/Headers/movrs_avx10_2_512intrin.h b/clang/lib/Headers/movrs_avx10_2_512intrin.h
new file mode 100644
index 00000000000000..1d04d3122f2c5f
--- /dev/null
+++ b/clang/lib/Headers/movrs_avx10_2_512intrin.h
@@ -0,0 +1,98 @@
+/*===-------- movrs_avx10_2_512intrin.h - AVX512MOVRS intrinsics -----------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <movrs_avx10_2_512intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __MOVRS_AVX10_2_512INTRIN_H
+#define __MOVRS_AVX10_2_512INTRIN_H
+#ifdef __x86_64__
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS512 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("movrs, avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi8(void const *__A) {
+ return (__m512i)__builtin_ia32_vmovrsb512((const __v64qi *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi8(__m512i __W, __mmask64 __U, void const *__A) {
+ return (__m512i)__builtin_ia32_selectb_512(
+ (__mmask64)__U, (__v64qi)_mm512_loadrs_epi8(__A), (__v64qi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi8(__mmask64 __U, void const *__A) {
+ return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
+ (__v64qi)_mm512_loadrs_epi8(__A),
+ (__v64qi)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi32(void const *__A) {
+ return (__m512i)__builtin_ia32_vmovrsd512((const __v16si *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi32(__m512i __W, __mmask16 __U, void const *__A) {
+ return (__m512i)__builtin_ia32_selectd_512(
+ (__mmask16)__U, (__v16si)_mm512_loadrs_epi32(__A), (__v16si)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi32(__mmask16 __U, void const *__A) {
+ return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
+ (__v16si)_mm512_loadrs_epi32(__A),
+ (__v16si)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi64(void const *__A) {
+ return (__m512i)__builtin_ia32_vmovrsq512((const __v8di *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi64(__m512i __W, __mmask8 __U, void const *__A) {
+ return (__m512i)__builtin_ia32_selectq_512(
+ (__mmask8)__U, (__v8di)_mm512_loadrs_epi64(__A), (__v8di)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
+ return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
+ (__v8di)_mm512_loadrs_epi64(__A),
+ (__v8di)_mm512_setzero_si512());
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_loadrs_epi16(void const *__A) {
+ return (__m512i)__builtin_ia32_vmovrsw512((const __v32hi *)(__A));
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_mask_loadrs_epi16(__m512i __W, __mmask32 __U, void const *__A) {
+ return (__m512i)__builtin_ia32_selectw_512(
+ (__mmask32)__U, (__v32hi)_mm512_loadrs_epi16(__A), (__v32hi)__W);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_maskz_loadrs_epi16(__mmask32 __U, void const *__A) {
+ return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
+ (__v32hi)_mm512_loadrs_epi16(__A),
+ (__v32hi)_mm512_setzero_si512());
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif /* __x86_64__ */
+#endif /* __MOVRS_AVX10_2_512INTRIN_H */
diff --git a/clang/lib/Headers/movrs_avx10_2intrin.h b/clang/lib/Headers/movrs_avx10_2intrin.h
new file mode 100644
index 00000000000000..f38c78afe2ef94
--- /dev/null
+++ b/clang/lib/Headers/movrs_avx10_2intrin.h
@@ -0,0 +1,174 @@
+/*===---------- movrs_avx10_2intrin.h - AVX512MOVRS intrinsics -------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <movrs_avx10_2intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef __MOVRS_AVX10_2INTRIN_H
+#define __MOVRS_AVX10_2INTRIN_H
+#ifdef __x86_64__
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("movrs,avx10.2-256"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("movrs,avx10.2-256"), __min_vector_width__(256)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi8(void const *__A) {
+ return (__m128i)__builtin_ia32_vmovrsb128((const __v16qi *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi8(__m128i __W, __mmask16 __U, void const *__A) {
+ return (__m128i)__builtin_ia32_selectb_128(
+ (__mmask16)__U, (__v16qi)_mm_loadrs_epi8(__A), (__v16qi)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi8(__mmask16 __U, void const *__A) {
+ return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
+ (__v16qi)_mm_loadrs_epi8(__A),
+ (__v16qi)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi8(void const *__A) {
+ return (__m256i)__builtin_ia32_vmovrsb256((const __v32qi *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi8(__m256i __W, __mmask32 __U, void const *__A) {
+ return (__m256i)__builtin_ia32_selectb_256(
+ (__mmask32)__U, (__v32qi)_mm256_loadrs_epi8(__A), (__v32qi)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi8(__mmask32 __U, void const *__A) {
+ return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
+ (__v32qi)_mm256_loadrs_epi8(__A),
+ (__v32qi)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi32(void const *__A) {
+ return (__m128i)__builtin_ia32_vmovrsd128((const __v4si *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi32(__m128i __W, __mmask8 __U, void const *__A) {
+ return (__m128i)__builtin_ia32_selectd_128(
+ (__mmask8)__U, (__v4si)_mm_loadrs_epi32(__A), (__v4si)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi32(__mmask8 __U, void const *__A) {
+ return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
+ (__v4si)_mm_loadrs_epi32(__A),
+ (__v4si)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi32(void const *__A) {
+ return (__m256i)__builtin_ia32_vmovrsd256((const __v8si *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi32(__m256i __W, __mmask8 __U, void const *__A) {
+ return (__m256i)__builtin_ia32_selectd_256(
+ (__mmask8)__U, (__v8si)_mm256_loadrs_epi32(__A), (__v8si)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi32(__mmask8 __U, void const *__A) {
+ return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
+ (__v8si)_mm256_loadrs_epi32(__A),
+ (__v8si)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi64(void const *__A) {
+ return (__m128i)__builtin_ia32_vmovrsq128((const __v2di *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi64(__m128i __W, __mmask8 __U, void const *__A) {
+ return (__m128i)__builtin_ia32_selectq_128(
+ (__mmask8)__U, (__v2di)_mm_loadrs_epi64(__A), (__v2di)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
+ return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
+ (__v2di)_mm_loadrs_epi64(__A),
+ (__v2di)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi64(void const *__A) {
+ return (__m256i)__builtin_ia32_vmovrsq256((const __v4di *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi64(__m256i __W, __mmask8 __U, void const *__A) {
+ return (__m256i)__builtin_ia32_selectq_256(
+ (__mmask8)__U, (__v4di)_mm256_loadrs_epi64(__A), (__v4di)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi64(__mmask8 __U, void const *__A) {
+ return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
+ (__v4di)_mm256_loadrs_epi64(__A),
+ (__v4di)_mm256_setzero_si256());
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_loadrs_epi16(void const *__A) {
+ return (__m128i)__builtin_ia32_vmovrsw128((const __v8hi *)(__A));
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_mask_loadrs_epi16(__m128i __W, __mmask8 __U, void const *__A) {
+ return (__m128i)__builtin_ia32_selectw_128(
+ (__mmask8)__U, (__v8hi)_mm_loadrs_epi16(__A), (__v8hi)__W);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_maskz_loadrs_epi16(__mmask8 __U, void const *__A) {
+ return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
+ (__v8hi)_mm_loadrs_epi16(__A),
+ (__v8hi)_mm_setzero_si128());
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_loadrs_epi16(void const *__A) {
+ return (__m256i)__builtin_ia32_vmovrsw256((const __v16hi *)(__A));
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_mask_loadrs_epi16(__m256i __W, __mmask16 __U, void const *__A) {
+ return (__m256i)__builtin_ia32_selectw_256(
+ (__mmask16)__U, (__v16hi)_mm256_loadrs_epi16(__A), (__v16hi)__W);
+}
+
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_maskz_loadrs_epi16(__mmask16 __U, void const *__A) {
+ return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
+ (__v16hi)_mm256_loadrs_epi16(__A),
+ (__v16hi)_mm256_setzero_si256());
+}
+
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
+
+#endif /* __x86_64__ */
+#endif /* __MOVRS_AVX10_2INTRIN_H */
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
new file mode 100644
index 00000000000000..944033724a6a2b
--- /dev/null
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins-error-32.c
@@ -0,0 +1,50 @@
+// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -verify
+
+#include <immintrin.h>
+__m512i test_mm512_loadrs_epi8(const __m512i * __A) {
+ return _mm512_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi8(__m512i __A, __mmask64 __B, const __m512i * __C) {
+ return _mm512_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi8(__mmask64 __A, const __m512i * __B) {
+ return _mm512_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_loadrs_epi32(const __m512i * __A) {
+ return _mm512_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi32(__m512i __A, __mmask16 __B, const __m512i * __C) {
+ return _mm512_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi32(__mmask16 __A, const __m512i * __B) {
+ return _mm512_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_loadrs_epi64(const __m512i * __A) {
+ return _mm512_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi64(__m512i __A, __mmask8 __B, const __m512i * __C) {
+ return _mm512_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi64(__mmask8 __A, const __m512i * __B) {
+ return _mm512_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_loadrs_epi16(const __m512i * __A) {
+ return _mm512_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_mask_loadrs_epi16(__m512i __A, __mmask32 __B, const __m512i * __C) {
+ return _mm512_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
+
+__m512i test_mm512_maskz_loadrs_epi16(__mmask32 __A, const __m512i * __B) {
+ return _mm512_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m512i' (vector of 8 'long long' values)}}
+}
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c
new file mode 100644
index 00000000000000..997d6dbc53a8b0
--- /dev/null
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-512-builtins.c
@@ -0,0 +1,87 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m512i test_mm512_loadrs_epi8(const __m512i * __A) {
+ // CHECK-LABEL: @test_mm512_loadrs_epi8(
+ // CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512(
+ return _mm512_loadrs_epi8(__A);
+}
+
+__m512i test_mm512_mask_loadrs_epi8(__m512i __A, __mmask64 __B, const __m512i * __C) {
+ // CHECK-LABEL: @test_mm512_mask_loadrs_epi8(
+ // CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512(
+ // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
+ return _mm512_mask_loadrs_epi8(__A, __B, __C);
+}
+
+__m512i test_mm512_maskz_loadrs_epi8(__mmask64 __A, const __m512i * __B) {
+ // CHECK-LABEL: @test_mm512_maskz_loadrs_epi8(
+ // CHECK: call <64 x i8> @llvm.x86.avx10.vmovrsb512(
+ // CHECK: store <8 x i64> zeroinitializer
+ // CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
+ return _mm512_maskz_loadrs_epi8(__A, __B);
+}
+
+__m512i test_mm512_loadrs_epi32(const __m512i * __A) {
+ // CHECK-LABEL: @test_mm512_loadrs_epi32(
+ // CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512(
+ return _mm512_loadrs_epi32(__A);
+}
+
+__m512i test_mm512_mask_loadrs_epi32(__m512i __A, __mmask16 __B, const __m512i * __C) {
+ // CHECK-LABEL: @test_mm512_mask_loadrs_epi32(
+ // CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512(
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_mask_loadrs_epi32(__A, __B, __C);
+}
+
+__m512i test_mm512_maskz_loadrs_epi32(__mmask16 __A, const __m512i * __B) {
+ // CHECK-LABEL: @test_mm512_maskz_loadrs_epi32(
+ // CHECK: call <16 x i32> @llvm.x86.avx10.vmovrsd512(
+ // CHECK: store <8 x i64> zeroinitializer
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
+ return _mm512_maskz_loadrs_epi32(__A, __B);
+}
+
+__m512i test_mm512_loadrs_epi64(const __m512i * __A) {
+ // CHECK-LABEL: @test_mm512_loadrs_epi64(
+ // CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512(
+ return _mm512_loadrs_epi64(__A);
+}
+
+__m512i test_mm512_mask_loadrs_epi64(__m512i __A, __mmask8 __B, const __m512i * __C) {
+ // CHECK-LABEL: @test_mm512_mask_loadrs_epi64(
+ // CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512(
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_mask_loadrs_epi64(__A, __B, __C);
+}
+
+__m512i test_mm512_maskz_loadrs_epi64(__mmask8 __A, const __m512i * __B) {
+ // CHECK-LABEL: @test_mm512_maskz_loadrs_epi64(
+ // CHECK: call <8 x i64> @llvm.x86.avx10.vmovrsq512(
+ // CHECK: store <8 x i64> zeroinitializer
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
+ return _mm512_maskz_loadrs_epi64(__A, __B);
+}
+
+__m512i test_mm512_loadrs_epi16(const __m512i * __A) {
+ // CHECK-LABEL: @test_mm512_loadrs_epi16(
+ // CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512(
+ return _mm512_loadrs_epi16(__A);
+}
+
+__m512i test_mm512_mask_loadrs_epi16(__m512i __A, __mmask32 __B, const __m512i * __C) {
+ // CHECK-LABEL: @test_mm512_mask_loadrs_epi16(
+ // CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512(
+ // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_mask_loadrs_epi16(__A, __B, __C);
+}
+
+__m512i test_mm512_maskz_loadrs_epi16(__mmask32 __A, const __m512i * __B) {
+ // CHECK-LABEL: @test_mm512_maskz_loadrs_epi16(
+ // CHECK: call <32 x i16> @llvm.x86.avx10.vmovrsw512(
+ // CHECK: store <8 x i64> zeroinitializer
+ // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
+ return _mm512_maskz_loadrs_epi16(__A, __B);
+}
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
new file mode 100644
index 00000000000000..68608b0cbff09f
--- /dev/null
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins-error-32.c
@@ -0,0 +1,98 @@
+// RUN: %clang_cc1 -ffreestanding %s -Wno-implicit-function-declaration -triple=i386-unknown-unknown -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -verify
+
+#include <immintrin.h>
+__m128i test_mm_loadrs_epi8(const __m128i * __A) {
+ return _mm_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_mask_loadrs_epi8(__m128i __A, __mmask16 __B, const __m128i * __C) {
+ return _mm_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_maskz_loadrs_epi8(__mmask16 __A, const __m128i * __B) {
+ return _mm_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m256i test_mm256_loadrs_epi8(const __m256i * __A) {
+ return _mm256_loadrs_epi8(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_mask_loadrs_epi8(__m256i __A, __mmask32 __B, const __m256i * __C) {
+ return _mm256_mask_loadrs_epi8(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_maskz_loadrs_epi8(__mmask32 __A, const __m256i * __B) {
+ return _mm256_maskz_loadrs_epi8(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m128i test_mm_loadrs_epi32(const __m128i * __A) {
+ return _mm_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_mask_loadrs_epi32(__m128i __A, __mmask8 __B, const __m128i * __C) {
+ return _mm_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_maskz_loadrs_epi32(__mmask8 __A, const __m128i * __B) {
+ return _mm_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m256i test_mm256_loadrs_epi32(const __m256i * __A) {
+ return _mm256_loadrs_epi32(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_mask_loadrs_epi32(__m256i __A, __mmask8 __B, const __m256i * __C) {
+ return _mm256_mask_loadrs_epi32(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_maskz_loadrs_epi32(__mmask8 __A, const __m256i * __B) {
+ return _mm256_maskz_loadrs_epi32(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m128i test_mm_loadrs_epi64(const __m128i * __A) {
+ return _mm_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_mask_loadrs_epi64(__m128i __A, __mmask8 __B, const __m128i * __C) {
+ return _mm_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_maskz_loadrs_epi64(__mmask8 __A, const __m128i * __B) {
+ return _mm_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m256i test_mm256_loadrs_epi64(const __m256i * __A) {
+ return _mm256_loadrs_epi64(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_mask_loadrs_epi64(__m256i __A, __mmask8 __B, const __m256i * __C) {
+ return _mm256_mask_loadrs_epi64(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_maskz_loadrs_epi64(__mmask8 __A, const __m256i * __B) {
+ return _mm256_maskz_loadrs_epi64(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m128i test_mm_loadrs_epi16(const __m128i * __A) {
+ return _mm_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_mask_loadrs_epi16(__m128i __A, __mmask8 __B, const __m128i * __C) {
+ return _mm_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m128i test_mm_maskz_loadrs_epi16(__mmask8 __A, const __m128i * __B) {
+ return _mm_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m128i' (vector of 2 'long long' values)}}
+}
+
+__m256i test_mm256_loadrs_epi16(const __m256i * __A) {
+ return _mm256_loadrs_epi16(__A); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_mask_loadrs_epi16(__m256i __A, __mmask16 __B, const __m256i * __C) {
+ return _mm256_mask_loadrs_epi16(__A, __B, __C); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
+
+__m256i test_mm256_maskz_loadrs_epi16(__mmask16 __A, const __m256i * __B) {
+ return _mm256_maskz_loadrs_epi16(__A, __B); // expected-error {{returning 'int' from a function with incompatible result type '__m256i' (vector of 4 'long long' values)}}
+}
diff --git a/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c b/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c
new file mode 100644
index 00000000000000..2011b2a8624738
--- /dev/null
+++ b/clang/test/CodeGen/X86/movrs-avx10.2-builtins.c
@@ -0,0 +1,171 @@
+// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-- -target-feature +movrs -target-feature +avx10.2-256 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m128i test_mm_loadrs_epi8(const __m128i * __A) {
+ // CHECK-LABEL: @test_mm_loadrs_epi8(
+ // CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128(
+ return _mm_loadrs_epi8(__A);
+}
+
+__m128i test_mm_mask_loadrs_epi8(__m128i __A, __mmask16 __B, const __m128i * __C) {
+ // CHECK-LABEL: @test_mm_mask_loadrs_epi8(
+ // CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128(
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+ return _mm_mask_loadrs_epi8(__A, __B, __C);
+}
+
+__m128i test_mm_maskz_loadrs_epi8(__mmask16 __A, const __m128i * __B) {
+ // CHECK-LABEL: @test_mm_maskz_loadrs_epi8(
+ // CHECK: call <16 x i8> @llvm.x86.avx10.vmovrsb128(
+ // CHECK: store <2 x i64> zeroinitializer
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
+ return _mm_maskz_loadrs_epi8(__A, __B);
+}
+
+__m256i test_mm256_loadrs_epi8(const __m256i * __A) {
+ // CHECK-LABEL: @test_mm256_loadrs_epi8(
+ // CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256(
+ return _mm256_loadrs_epi8(__A);
+}
+
+__m256i test_mm256_mask_loadrs_epi8(__m256i __A, __mmask32 __B, const __m256i * __C) {
+ // CHECK-LABEL: @test_mm256_mask_loadrs_epi8(
+ // CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256(
+ // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
+ return _mm256_mask_loadrs_epi8(__A, __B, __C);
+}
+
+__m256i test_mm256_maskz_loadrs_epi8(__mmask32 __A, const __m256i * __B) {
+ // CHECK-LABEL: @test_mm256_maskz_loadrs_epi8(
+ // CHECK: call <32 x i8> @llvm.x86.avx10.vmovrsb256(
+ // CHECK: store <4 x i64> zeroinitializer
+ // CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
+ return _mm256_maskz_loadrs_epi8(__A, __B);
+}
+
+__m128i test_mm_loadrs_epi32(const __m128i * __A) {
+ // CHECK-LABEL: @test_mm_loadrs_epi32(
+ // CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128(
+ return _mm_loadrs_epi32(__A);
+}
+
+__m128i test_mm_mask_loadrs_epi32(__m128i __A, __mmask8 __B, const __m128i * __C) {
+ // CHECK-LABEL: @test_mm_mask_loadrs_epi32(
+ // CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128(
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+ return _mm_mask_loadrs_epi32(__A, __B, __C);
+}
+
+__m128i test_mm_maskz_loadrs_epi32(__mmask8 __A, const __m128i * __B) {
+ // CHECK-LABEL: @test_mm_maskz_loadrs_epi32(
+ // CHECK: call <4 x i32> @llvm.x86.avx10.vmovrsd128(
+ // CHECK: store <2 x i64> zeroinitializer
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}
+ return _mm_maskz_loadrs_epi32(__A, __B);
+}
+
+__m256i test_mm256_loadrs_epi32(const __m256i * __A) {
+ // CHECK-LABEL: @test_mm256_loadrs_epi32(
+ // CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256(
+ return _mm256_loadrs_epi32(__A);
+}
+
+__m256i test_mm256_mask_loadrs_epi32(__m256i __A, __mmask8 __B, const __m256i * __C) {
+ // CHECK-LABEL: @test_mm256_mask_loadrs_epi32(
+ // CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256(
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
+ return _mm256_mask_loadrs_epi32(__A, __B, __C);
+}
+
+__m256i test_mm256_maskz_loadrs_epi32(__mmask8 __A, const __m256i * __B) {
+ // CHECK-LABEL: @test_mm256_maskz_loadrs_epi32(
+ // CHECK: call <8 x i32> @llvm.x86.avx10.vmovrsd256(
+ // CHECK: store <4 x i64> zeroinitializer
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}}
+ return _mm256_maskz_loadrs_epi32(__A, __B);
+}
+
+__m128i test_mm_loadrs_epi64(const __m128i * __A) {
+ // CHECK-LABEL: @test_mm_loadrs_epi64(
+ // CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128(
+ return _mm_loadrs_epi64(__A);
+}
+
+__m128i test_mm_mask_loadrs_epi64(__m128i __A, __mmask8 __B, const __m128i * __C) {
+ // CHECK-LABEL: @test_mm_mask_loadrs_epi64(
+ // CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128(
+ // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
+ return _mm_mask_loadrs_epi64(__A, __B, __C);
+}
+
+__m128i test_mm_maskz_loadrs_epi64(__mmask8 __A, const __m128i * __B) {
+ // CHECK-LABEL: @test_mm_maskz_loadrs_epi64(
+ // CHECK: call <2 x i64> @llvm.x86.avx10.vmovrsq128(
+ // CHECK: store <2 x i64> zeroinitializer
+ // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}
+ return _mm_maskz_loadrs_epi64(__A, __B);
+}
+
+__m256i test_mm256_loadrs_epi64(const __m256i * __A) {
+ // CHECK-LABEL: @test_mm256_loadrs_epi64(
+ // CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256(
+ return _mm256_loadrs_epi64(__A);
+}
+
+__m256i test_mm256_mask_loadrs_epi64(__m256i __A, __mmask8 __B, const __m256i * __C) {
+ // CHECK-LABEL: @test_mm256_mask_loadrs_epi64(
+ // CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256(
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
+ return _mm256_mask_loadrs_epi64(__A, __B, __C);
+}
+
+__m256i test_mm256_maskz_loadrs_epi64(__mmask8 __A, const __m256i * __B) {
+ // CHECK-LABEL: @test_mm256_maskz_loadrs_epi64(
+ // CHECK: call <4 x i64> @llvm.x86.avx10.vmovrsq256(
+ // CHECK: store <4 x i64> zeroinitializer
+ // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}
+ return _mm256_maskz_loadrs_epi64(__A, __B);
+}
+
+__m128i test_mm_loadrs_epi16(const __m128i * __A) {
+ // CHECK-LABEL: @test_mm_loadrs_epi16(
+ // CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128(
+ return _mm_loadrs_epi16(__A);
+}
+
+__m128i test_mm_mask_loadrs_epi16(__m128i __A, __mmask8 __B, const __m128i * __C) {
+ // CHECK-LABEL: @test_mm_mask_loadrs_epi16(
+ // CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128(
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+ return _mm_mask_loadrs_epi16(__A, __B, __C);
+}
+
+__m128i test_mm_maskz_loadrs_epi16(__mmask8 __A, const __m128i * __B) {
+ // CHECK-LABEL: @test_mm_maskz_loadrs_epi16(
+ // CHECK: call <8 x i16> @llvm.x86.avx10.vmovrsw128(
+ // CHECK: store <2 x i64> zeroinitializer
+ // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
+ return _mm_maskz_loadrs_epi16(__A, __B);
+}
+
+__m256i test_mm256_loadrs_epi16(const __m256i * __A) {
+ // CHECK-LABEL: @test_mm256_loadrs_epi16(
+ // CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256(
+ return _mm256_loadrs_epi16(__A);
+}
+
+__m256i test_mm256_mask_loadrs_epi16(__m256i __A, __mmask16 __B, const __m256i * __C) {
+ // CHECK-LABEL: @test_mm256_mask_loadrs_epi16(
+ // CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256(
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+ return _mm256_mask_loadrs_epi16(__A, __B, __C);
+}
+
+__m256i test_mm256_maskz_loadrs_epi16(__mmask16 __A, const __m256i * __B) {
+ // CHECK-LABEL: @test_mm256_maskz_loadrs_epi16(
+ // CHECK: call <16 x i16> @llvm.x86.avx10.vmovrsw256(
+ // CHECK: store <4 x i64> zeroinitializer
+ // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
+ return _mm256_maskz_loadrs_epi16(__A, __B);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5262e3154ff721..33070fef7a7351 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -7572,3 +7572,42 @@ def int_x86_avx10_vfnmsub231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub231
DefaultAttrsIntrinsic<[llvm_v8bf16_ty], [llvm_v8bf16_ty, llvm_v8bf16_ty, llvm_v8bf16_ty ],
[IntrNoMem]>;
}
+
+let TargetPrefix = "x86" in {
+def int_x86_avx10_vmovrsb128 : ClangBuiltin<"__builtin_ia32_vmovrsb128">,
+ Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsb256 : ClangBuiltin<"__builtin_ia32_vmovrsb256">,
+ Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsb512 : ClangBuiltin<"__builtin_ia32_vmovrsb512">,
+ Intrinsic<[llvm_v64i8_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsd128 : ClangBuiltin<"__builtin_ia32_vmovrsd128">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsd256 : ClangBuiltin<"__builtin_ia32_vmovrsd256">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsd512 : ClangBuiltin<"__builtin_ia32_vmovrsd512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsq128 : ClangBuiltin<"__builtin_ia32_vmovrsq128">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsq256 : ClangBuiltin<"__builtin_ia32_vmovrsq256">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsq512 : ClangBuiltin<"__builtin_ia32_vmovrsq512">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsw128 : ClangBuiltin<"__builtin_ia32_vmovrsw128">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsw256 : ClangBuiltin<"__builtin_ia32_vmovrsw256">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+def int_x86_avx10_vmovrsw512 : ClangBuiltin<"__builtin_ia32_vmovrsw512">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_ptr_ty],
+ [IntrReadMem]>;
+}
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index e5bf196559ba63..80468146a02b00 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -261,6 +261,7 @@ X86_FEATURE_COMPAT(AVX10_1, "avx10.1-256", 36)
X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37)
X86_FEATURE_COMPAT(AVX10_2, "avx10.2-256", 0)
X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512", 0)
+X86_FEATURE_COMPAT(MOVRS, "movrs", 0)
X86_FEATURE (ZU, "zu")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index d57450d91ea2dd..6bedf9e1d13ac3 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -351,6 +351,8 @@ def FeatureZU : SubtargetFeature<"zu", "HasZU", "true",
def FeatureUseGPR32InInlineAsm
: SubtargetFeature<"inline-asm-use-gpr32", "UseInlineAsmGPR32", "true",
"Enable use of GPR32 in inline assembly for APX">;
+def FeatureMOVRS : SubtargetFeature<"movrs", "HasMOVRS", "true",
+ "Enable MOVRS", []>;
// Ivy Bridge and newer processors have enhanced REP MOVSB and STOSB (aka
// "string operations"). See "REP String Enhancement" in the Intel Software
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..9ef2debb57fa00 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1647,3 +1647,31 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
}
+
+// MOVRS
+multiclass vmovrs_p<bits<8> opc, string OpStr, X86VectorVTInfo _> {
+ let ExeDomain = _.ExeDomain in {
+ defm m: AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst),
+ (ins _.MemOp:$src), OpStr, "$src", "$src",
+ (_.VT (!cast<Intrinsic>("int_x86_avx10_"#OpStr#_.Size)
+ addr:$src))>, EVEX;
+ }
+}
+
+multiclass vmovrs_p_vl<bits<8> opc, string OpStr, AVX512VLVectorVTInfo _Vec> {
+ let Predicates = [HasMOVRS, HasAVX10_2_512] in
+ defm Z : vmovrs_p<opc, OpStr, _Vec.info512>, EVEX_V512;
+ let Predicates = [HasMOVRS, HasAVX10_2] in {
+ defm Z128 : vmovrs_p<opc, OpStr, _Vec.info128>, EVEX_V128;
+ defm Z256 : vmovrs_p<opc, OpStr, _Vec.info256>, EVEX_V256;
+ }
+}
+
+defm VMOVRSB : vmovrs_p_vl<0x6f, "vmovrsb", avx512vl_i8_info>,
+ T_MAP5, XD, EVEX_CD8<8, CD8VF>, Sched<[WriteVecLoad]>;
+defm VMOVRSW : vmovrs_p_vl<0x6f, "vmovrsw", avx512vl_i16_info>,
+ T_MAP5, XD, REX_W, EVEX_CD8<16, CD8VF>, Sched<[WriteVecLoad]>;
+defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>,
+ T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>;
+defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>,
+ T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>;
diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td
index a815ddc9714f0c..7fb566fba51818 100644
--- a/llvm/lib/Target/X86/X86InstrPredicates.td
+++ b/llvm/lib/Target/X86/X86InstrPredicates.td
@@ -152,6 +152,7 @@ def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
def HasCLDEMOTE : Predicate<"Subtarget->hasCLDEMOTE()">;
def HasMOVDIRI : Predicate<"Subtarget->hasMOVDIRI()">;
def HasMOVDIR64B : Predicate<"Subtarget->hasMOVDIR64B()">;
+def HasMOVRS : Predicate<"Subtarget->hasMOVRS()">;
def HasPTWRITE : Predicate<"Subtarget->hasPTWRITE()">;
def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 09d4312918acfe..586df5748aa822 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -639,6 +639,8 @@ constexpr FeatureBitset ImpliedFeaturesNF = {};
constexpr FeatureBitset ImpliedFeaturesCF = {};
constexpr FeatureBitset ImpliedFeaturesZU = {};
+constexpr FeatureBitset ImpliedFeaturesMOVRS = {};
+
constexpr FeatureInfo FeatureInfos[X86::CPU_FEATURE_MAX] = {
#define X86_FEATURE(ENUM, STR) {{"+" STR}, ImpliedFeatures##ENUM},
#include "llvm/TargetParser/X86TargetParser.def"
diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
new file mode 100644
index 00000000000000..a730ef519c015e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/movrs-avx10.2-512-intrinsics.ll
@@ -0,0 +1,163 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-512 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK
+
+declare <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr)
+declare <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr)
+declare <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr)
+declare <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr)
+
+define <8 x i64> @test_mm512_movrsb_epi8(ptr %__A) {
+; CHECK-LABEL: test_mm512_movrsb_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsb (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7f,0x48,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__A)
+ %1 = bitcast <64 x i8> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @test_mm512_mask_movrsb_epi8(<8 x i64> %__A, i64 %__B, ptr %__C) {
+; CHECK-LABEL: test_mm512_mask_movrsb_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsb (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x49,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__C)
+ %1 = bitcast <8 x i64> %__A to <64 x i8>
+ %2 = bitcast i64 %__B to <64 x i1>
+ %3 = select <64 x i1> %2, <64 x i8> %0, <64 x i8> %1
+ %4 = bitcast <64 x i8> %3 to <8 x i64>
+ ret <8 x i64> %4
+}
+
+define dso_local <8 x i64> @test_mm512_maskz_movrsb_epi8(i64 %__A, ptr %__B) {
+; CHECK-LABEL: test_mm512_maskz_movrsb_epi8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsb (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xc9,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <64 x i8> @llvm.x86.avx10.vmovrsb512(ptr %__B)
+ %1 = bitcast i64 %__A to <64 x i1>
+ %2 = select <64 x i1> %1, <64 x i8> %0, <64 x i8> zeroinitializer
+ %3 = bitcast <64 x i8> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
+
+define <8 x i64> @test_mm512_movrsd_epi32(ptr %__A) {
+; CHECK-LABEL: test_mm512_movrsd_epi32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsd (%rdi), %zmm0 # encoding: [0x62,0xf5,0x7e,0x48,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__A)
+ %1 = bitcast <16 x i32> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @test_mm512_mask_movrsd_epi32(<8 x i64> %__A, i16 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm512_mask_movrsd_epi32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsd (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x49,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__C)
+ %1 = bitcast <8 x i64> %__A to <16 x i32>
+ %2 = bitcast i16 %__B to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
+ %4 = bitcast <16 x i32> %3 to <8 x i64>
+ ret <8 x i64> %4
+}
+
+define <8 x i64> @test_mm512_maskz_movrsd_epi32(i16 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm512_maskz_movrsd_epi32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsd (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0xc9,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i32> @llvm.x86.avx10.vmovrsd512(ptr %__B)
+ %1 = bitcast i16 %__A to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer
+ %3 = bitcast <16 x i32> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
+
+define <8 x i64> @test_mm512_movrsq_epi64(ptr %__A) {
+; CHECK-LABEL: test_mm512_movrsq_epi64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsq (%rdi), %zmm0 # encoding: [0x62,0xf5,0xfe,0x48,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__A)
+ ret <8 x i64> %0
+}
+
+define <8 x i64> @test_mm512_mask_movrsq_epi64(<8 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm512_mask_movrsq_epi64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsq (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x49,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__C)
+ %1 = bitcast i8 %__B to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A
+ ret <8 x i64> %2
+}
+
+define <8 x i64> @test_mm512_maskz_movrsq_epi64(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm512_maskz_movrsq_epi64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsq (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0xc9,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i64> @llvm.x86.avx10.vmovrsq512(ptr %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
+ ret <8 x i64> %2
+}
+
+define <8 x i64> @test_mm512_movrsw_epi16(ptr %__A) {
+; CHECK-LABEL: test_mm512_movrsw_epi16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsw (%rdi), %zmm0 # encoding: [0x62,0xf5,0xff,0x48,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__A)
+ %1 = bitcast <32 x i16> %0 to <8 x i64>
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @test_mm512_mask_movrsw_epi16(<8 x i64> %__A, i32 %__B, ptr %__C) {
+; CHECK-LABEL: test_mm512_mask_movrsw_epi16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsw (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf5,0xff,0x49,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__C)
+ %1 = bitcast <8 x i64> %__A to <32 x i16>
+ %2 = bitcast i32 %__B to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i16> %0, <32 x i16> %1
+ %4 = bitcast <32 x i16> %3 to <8 x i64>
+ ret <8 x i64> %4
+}
+
+define <8 x i64> @test_mm512_maskz_movrsw_epi16(i32 %__A, ptr %__B) {
+; CHECK-LABEL: test_mm512_maskz_movrsw_epi16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsw (%rsi), %zmm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0xc9,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i16> @llvm.x86.avx10.vmovrsw512(ptr %__B)
+ %1 = bitcast i32 %__A to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x i16> %0, <32 x i16> zeroinitializer
+ %3 = bitcast <32 x i16> %2 to <8 x i64>
+ ret <8 x i64> %3
+}
diff --git a/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
new file mode 100644
index 00000000000000..583e16351652b2
--- /dev/null
+++ b/llvm/test/CodeGen/X86/movrs-avx10.2-intrinsics.ll
@@ -0,0 +1,329 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+movrs,+avx10.2-256 -verify-machineinstrs --show-mc-encoding | FileCheck %s --check-prefixes=CHECK
+
+define <2 x i64> @test_mm_movrsb_epu8(ptr %__A) {
+; CHECK-LABEL: test_mm_movrsb_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsb (%rdi), %xmm0 # encoding: [0x62,0xf5,0x7f,0x08,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__A)
+ %1 = bitcast <16 x i8> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm_mask_movrsb_epu8(<2 x i64> %__A, i16 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm_mask_movrsb_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsb (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x09,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__C)
+ %1 = bitcast <2 x i64> %__A to <16 x i8>
+ %2 = bitcast i16 %__B to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i8> %0, <16 x i8> %1
+ %4 = bitcast <16 x i8> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+define <2 x i64> @test_mm_maskz_movrsb_epu8(i16 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm_maskz_movrsb_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsb (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0x89,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr %__B )
+ %1 = bitcast i16 %__A to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i8> %0, <16 x i8> zeroinitializer
+ %3 = bitcast <16 x i8> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <4 x i64> @test_mm256_movrsb_epu8(ptr %__A) {
+; CHECK-LABEL: test_mm256_movrsb_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsb (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7f,0x28,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__A)
+ %1 = bitcast <32 x i8> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm256_mask_movrsb_epu8(<4 x i64> %__A, i32 %__B, ptr %__C) {
+; CHECK-LABEL: test_mm256_mask_movrsb_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsb (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0x7f,0x29,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__C)
+ %1 = bitcast <4 x i64> %__A to <32 x i8>
+ %2 = bitcast i32 %__B to <32 x i1>
+ %3 = select <32 x i1> %2, <32 x i8> %0, <32 x i8> %1
+ %4 = bitcast <32 x i8> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+define <4 x i64> @test_mm256_maskz_movrsb_epu8(i32 %__A, ptr %__B) {
+; CHECK-LABEL: test_mm256_maskz_movrsb_epu8:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsb (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7f,0xa9,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr %__B)
+ %1 = bitcast i32 %__A to <32 x i1>
+ %2 = select <32 x i1> %1, <32 x i8> %0, <32 x i8> zeroinitializer
+ %3 = bitcast <32 x i8> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <2 x i64> @test_mm_movrsd_epu32(ptr %__A) {
+; CHECK-LABEL: test_mm_movrsd_epu32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsd (%rdi), %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__A)
+ %1 = bitcast <4 x i32> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm_mask_movrsd_epu32(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm_mask_movrsd_epu32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsd (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x09,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__C)
+ %1 = bitcast <2 x i64> %__A to <4 x i32>
+ %2 = bitcast i8 %__B to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %2, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> %1
+ %4 = bitcast <4 x i32> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+define <2 x i64> @test_mm_maskz_movrsd_epu32(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm_maskz_movrsd_epu32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsd (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0x89,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract.i, <4 x i32> %0, <4 x i32> zeroinitializer
+ %3 = bitcast <4 x i32> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <4 x i64> @test_mm256_movrsd_epu32(ptr %__A) {
+; CHECK-LABEL: test_mm256_movrsd_epu32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsd (%rdi), %ymm0 # encoding: [0x62,0xf5,0x7e,0x28,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__A)
+ %1 = bitcast <8 x i32> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm256_mask_movrsd_epu32(<4 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm256_mask_movrsd_epu32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsd (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0x7e,0x29,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__C)
+ %1 = bitcast <4 x i64> %__A to <8 x i32>
+ %2 = bitcast i8 %__B to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %0, <8 x i32> %1
+ %4 = bitcast <8 x i32> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+define <4 x i64> @test_mm256_maskz_movrsd_epu32(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm256_maskz_movrsd_epu32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsd (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0x7e,0xa9,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i32> %0, <8 x i32> zeroinitializer
+ %3 = bitcast <8 x i32> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+define <2 x i64> @test_mm_movrsq_epu64(ptr %__A) {
+; CHECK-LABEL: test_mm_movrsq_epu64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsq (%rdi), %xmm0 # encoding: [0x62,0xf5,0xfe,0x08,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__A)
+ ret <2 x i64> %0
+}
+
+define <2 x i64> @test_mm_mask_movrsq_epu64(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm_mask_movrsq_epu64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsq (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x09,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__C)
+ %1 = bitcast i8 %__B to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <2 x i32> <i32 0, i32 1>
+ %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> %__A
+ ret <2 x i64> %2
+}
+
+define <2 x i64> @test_mm_maskz_movrsq_epu64(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm_maskz_movrsq_epu64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsq (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0x89,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <2 x i32> <i32 0, i32 1>
+ %2 = select <2 x i1> %extract.i, <2 x i64> %0, <2 x i64> zeroinitializer
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @test_mm256_movrsq_epu64(ptr %__A) {
+; CHECK-LABEL: test_mm256_movrsq_epu64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsq (%rdi), %ymm0 # encoding: [0x62,0xf5,0xfe,0x28,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__A)
+ ret <4 x i64> %0
+}
+
+define <4 x i64> @test_mm256_mask_movrsq_epu64(<4 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm256_mask_movrsq_epu64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsq (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0xfe,0x29,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__C)
+ %1 = bitcast i8 %__B to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> %__A
+ ret <4 x i64> %2
+}
+
+define <4 x i64> @test_mm256_maskz_movrsq_epu64(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm256_maskz_movrsq_epu64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsq (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xfe,0xa9,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = select <4 x i1> %extract.i, <4 x i64> %0, <4 x i64> zeroinitializer
+ ret <4 x i64> %2
+}
+
+define <2 x i64> @test_mm_movrsw_epu16(ptr %__A) {
+; CHECK-LABEL: test_mm_movrsw_epu16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsw (%rdi), %xmm0 # encoding: [0x62,0xf5,0xff,0x08,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__A)
+ %1 = bitcast <8 x i16> %0 to <2 x i64>
+ ret <2 x i64> %1
+}
+
+define <2 x i64> @test_mm_mask_movrsw_epu16(<2 x i64> %__A, i8 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm_mask_movrsw_epu16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsw (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf5,0xff,0x09,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__C)
+ %1 = bitcast <2 x i64> %__A to <8 x i16>
+ %2 = bitcast i8 %__B to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i16> %0, <8 x i16> %1
+ %4 = bitcast <8 x i16> %3 to <2 x i64>
+ ret <2 x i64> %4
+}
+
+define <2 x i64> @test_mm_maskz_movrsw_epu16(i8 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm_maskz_movrsw_epu16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsw (%rsi), %xmm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0x89,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr %__B)
+ %1 = bitcast i8 %__A to <8 x i1>
+ %2 = select <8 x i1> %1, <8 x i16> %0, <8 x i16> zeroinitializer
+ %3 = bitcast <8 x i16> %2 to <2 x i64>
+ ret <2 x i64> %3
+}
+
+define <4 x i64> @test_mm256_movrsw_epu16(ptr %__A) {
+; CHECK-LABEL: test_mm256_movrsw_epu16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vmovrsw (%rdi), %ymm0 # encoding: [0x62,0xf5,0xff,0x28,0x6f,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__A)
+ %1 = bitcast <16 x i16> %0 to <4 x i64>
+ ret <4 x i64> %1
+}
+
+define <4 x i64> @test_mm256_mask_movrsw_epu16(<4 x i64> %__A, i16 zeroext %__B, ptr %__C) {
+; CHECK-LABEL: test_mm256_mask_movrsw_epu16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsw (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf5,0xff,0x29,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__C)
+ %1 = bitcast <4 x i64> %__A to <16 x i16>
+ %2 = bitcast i16 %__B to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i16> %0, <16 x i16> %1
+ %4 = bitcast <16 x i16> %3 to <4 x i64>
+ ret <4 x i64> %4
+}
+
+define <4 x i64> @test_mm256_maskz_movrsw_epu16(i16 zeroext %__A, ptr %__B) {
+; CHECK-LABEL: test_mm256_maskz_movrsw_epu16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vmovrsw (%rsi), %ymm0 {%k1} {z} # encoding: [0x62,0xf5,0xff,0xa9,0x6f,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr %__B)
+ %1 = bitcast i16 %__A to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x i16> %0, <16 x i16> zeroinitializer
+ %3 = bitcast <16 x i16> %2 to <4 x i64>
+ ret <4 x i64> %3
+}
+
+declare <16 x i8> @llvm.x86.avx10.vmovrsb128(ptr)
+declare <32 x i8> @llvm.x86.avx10.vmovrsb256(ptr)
+declare <4 x i32> @llvm.x86.avx10.vmovrsd128(ptr)
+declare <8 x i32> @llvm.x86.avx10.vmovrsd256(ptr)
+declare <2 x i64> @llvm.x86.avx10.vmovrsq128(ptr)
+declare <4 x i64> @llvm.x86.avx10.vmovrsq256(ptr)
+declare <8 x i16> @llvm.x86.avx10.vmovrsw128(ptr)
+declare <16 x i16> @llvm.x86.avx10.vmovrsw256(ptr)
diff --git a/llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt b/llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt
new file mode 100644
index 00000000000000..e25e66ae577438
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/movrs-avx10-64.txt
@@ -0,0 +1,98 @@
+# RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding --disassemble < %s | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovrsb 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vmovrsb zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7f,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmovrsb 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vmovrsb zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7f,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmovrsb (%rip), %zmm22
+# INTEL: vmovrsb zmm22, zmmword ptr [rip]
+0x62,0xe5,0x7f,0x48,0x6f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmovrsb -2048(,%rbp,2), %zmm22
+# INTEL: vmovrsb zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7f,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmovrsb 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vmovrsb zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7f,0xcf,0x6f,0x71,0x7f
+
+# ATT: vmovrsb -8192(%rdx), %zmm22 {%k7} {z}
+# INTEL: vmovrsb zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+0x62,0xe5,0x7f,0xcf,0x6f,0x72,0x80
+
+# ATT: vmovrsd 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vmovrsd zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0x7e,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmovrsd 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vmovrsd zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0x7e,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmovrsd (%rip), %zmm22
+# INTEL: vmovrsd zmm22, zmmword ptr [rip]
+0x62,0xe5,0x7e,0x48,0x6f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmovrsd -2048(,%rbp,2), %zmm22
+# INTEL: vmovrsd zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0x7e,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmovrsd 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vmovrsd zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0x7e,0xcf,0x6f,0x71,0x7f
+
+# ATT: vmovrsd -8192(%rdx), %zmm22 {%k7} {z}
+# INTEL: vmovrsd zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+0x62,0xe5,0x7e,0xcf,0x6f,0x72,0x80
+
+# ATT: vmovrsq 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vmovrsq zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xfe,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmovrsq 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vmovrsq zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xfe,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmovrsq (%rip), %zmm22
+# INTEL: vmovrsq zmm22, zmmword ptr [rip]
+0x62,0xe5,0xfe,0x48,0x6f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmovrsq -2048(,%rbp,2), %zmm22
+# INTEL: vmovrsq zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0xfe,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmovrsq 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vmovrsq zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0xfe,0xcf,0x6f,0x71,0x7f
+
+# ATT: vmovrsq -8192(%rdx), %zmm22 {%k7} {z}
+# INTEL: vmovrsq zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+0x62,0xe5,0xfe,0xcf,0x6f,0x72,0x80
+
+# ATT: vmovrsw 268435456(%rbp,%r14,8), %zmm22
+# INTEL: vmovrsw zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa5,0xff,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vmovrsw 291(%r8,%rax,4), %zmm22 {%k7}
+# INTEL: vmovrsw zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc5,0xff,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vmovrsw (%rip), %zmm22
+# INTEL: vmovrsw zmm22, zmmword ptr [rip]
+0x62,0xe5,0xff,0x48,0x6f,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vmovrsw -2048(,%rbp,2), %zmm22
+# INTEL: vmovrsw zmm22, zmmword ptr [2*rbp - 2048]
+0x62,0xe5,0xff,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vmovrsw 8128(%rcx), %zmm22 {%k7} {z}
+# INTEL: vmovrsw zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+0x62,0xe5,0xff,0xcf,0x6f,0x71,0x7f
+
+# ATT: vmovrsw -8192(%rdx), %zmm22 {%k7} {z}
+# INTEL: vmovrsw zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+0x62,0xe5,0xff,0xcf,0x6f,0x72,0x80
\ No newline at end of file
diff --git a/llvm/test/MC/X86/movrs-avx10-att-64.s b/llvm/test/MC/X86/movrs-avx10-att-64.s
new file mode 100644
index 00000000000000..982b7a1d41c039
--- /dev/null
+++ b/llvm/test/MC/X86/movrs-avx10-att-64.s
@@ -0,0 +1,98 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding < %s | FileCheck %s
+
+// CHECK: vmovrsb 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7f,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovrsb 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vmovrsb 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7f,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovrsb 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vmovrsb (%rip), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+ vmovrsb (%rip), %zmm22
+
+// CHECK: vmovrsb -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmovrsb -2048(,%rbp,2), %zmm22
+
+// CHECK: vmovrsb 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x71,0x7f]
+ vmovrsb 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsb -8192(%rdx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x72,0x80]
+ vmovrsb -8192(%rdx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsd 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovrsd 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vmovrsd 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0x7e,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovrsd 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vmovrsd (%rip), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+ vmovrsd (%rip), %zmm22
+
+// CHECK: vmovrsd -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmovrsd -2048(,%rbp,2), %zmm22
+
+// CHECK: vmovrsd 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x71,0x7f]
+ vmovrsd 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsd -8192(%rdx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x72,0x80]
+ vmovrsd -8192(%rdx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsq 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0xfe,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovrsq 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vmovrsq 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xfe,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovrsq 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vmovrsq (%rip), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+ vmovrsq (%rip), %zmm22
+
+// CHECK: vmovrsq -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmovrsq -2048(,%rbp,2), %zmm22
+
+// CHECK: vmovrsq 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x71,0x7f]
+ vmovrsq 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsq -8192(%rdx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x72,0x80]
+ vmovrsq -8192(%rdx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsw 268435456(%rbp,%r14,8), %zmm22
+// CHECK: encoding: [0x62,0xa5,0xff,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovrsw 268435456(%rbp,%r14,8), %zmm22
+
+// CHECK: vmovrsw 291(%r8,%rax,4), %zmm22 {%k7}
+// CHECK: encoding: [0x62,0xc5,0xff,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovrsw 291(%r8,%rax,4), %zmm22 {%k7}
+
+// CHECK: vmovrsw (%rip), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+ vmovrsw (%rip), %zmm22
+
+// CHECK: vmovrsw -2048(,%rbp,2), %zmm22
+// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmovrsw -2048(,%rbp,2), %zmm22
+
+// CHECK: vmovrsw 8128(%rcx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x71,0x7f]
+ vmovrsw 8128(%rcx), %zmm22 {%k7} {z}
+
+// CHECK: vmovrsw -8192(%rdx), %zmm22 {%k7} {z}
+// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x72,0x80]
+ vmovrsw -8192(%rdx), %zmm22 {%k7} {z}
+
diff --git a/llvm/test/MC/X86/movrs-avx10-intel-64.s b/llvm/test/MC/X86/movrs-avx10-intel-64.s
new file mode 100644
index 00000000000000..d61e41abe7d632
--- /dev/null
+++ b/llvm/test/MC/X86/movrs-avx10-intel-64.s
@@ -0,0 +1,97 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovrsb zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7f,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovrsb zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovrsb zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7f,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovrsb zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovrsb zmm22, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+ vmovrsb zmm22, zmmword ptr [rip]
+
+// CHECK: vmovrsb zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7f,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmovrsb zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmovrsb zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x71,0x7f]
+ vmovrsb zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vmovrsb zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe5,0x7f,0xcf,0x6f,0x72,0x80]
+ vmovrsb zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+
+// CHECK: vmovrsd zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7e,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovrsd zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovrsd zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7e,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovrsd zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovrsd zmm22, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+ vmovrsd zmm22, zmmword ptr [rip]
+
+// CHECK: vmovrsd zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0x7e,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmovrsd zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmovrsd zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x71,0x7f]
+ vmovrsd zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vmovrsd zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe5,0x7e,0xcf,0x6f,0x72,0x80]
+ vmovrsd zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+
+// CHECK: vmovrsq zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xfe,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovrsq zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovrsq zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xfe,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovrsq zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovrsq zmm22, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+ vmovrsq zmm22, zmmword ptr [rip]
+
+// CHECK: vmovrsq zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0xfe,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmovrsq zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmovrsq zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x71,0x7f]
+ vmovrsq zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vmovrsq zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe5,0xfe,0xcf,0x6f,0x72,0x80]
+ vmovrsq zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+
+// CHECK: vmovrsw zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0xff,0x48,0x6f,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovrsw zmm22, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovrsw zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0xff,0x4f,0x6f,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovrsw zmm22 {k7}, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovrsw zmm22, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x35,0x00,0x00,0x00,0x00]
+ vmovrsw zmm22, zmmword ptr [rip]
+
+// CHECK: vmovrsw zmm22, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe5,0xff,0x48,0x6f,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vmovrsw zmm22, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vmovrsw zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x71,0x7f]
+ vmovrsw zmm22 {k7} {z}, zmmword ptr [rcx + 8128]
+
+// CHECK: vmovrsw zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe5,0xff,0xcf,0x6f,0x72,0x80]
+ vmovrsw zmm22 {k7} {z}, zmmword ptr [rdx - 8192]
>From dc9387dbf69b806180702465c406630194c27bee Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 23 Oct 2024 10:21:04 +0800
Subject: [PATCH 2/4] Add release notes and CFE tests for movrs.
---
clang/docs/ReleaseNotes.rst | 3 +++
clang/include/clang/Driver/Options.td | 2 ++
clang/lib/Basic/Targets/X86.cpp | 2 ++
clang/test/CodeGen/target-builtin-noerror.c | 1 +
clang/test/Driver/x86-target-features.c | 5 +++++
clang/test/Preprocessor/x86_target_features.c | 6 ++++++
compiler-rt/lib/builtins/cpu_model/x86.c | 3 +++
llvm/docs/ReleaseNotes.md | 2 ++
llvm/lib/TargetParser/Host.cpp | 1 +
9 files changed, 25 insertions(+)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b7a6ace8bb895d..f212cdef7864e3 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -618,6 +618,9 @@ X86 Support
- All intrinsics in tbmintrin.h can now be used in constant expressions.
+- Supported intrinsics for `MOVRS AND AVX10.2`
+ * Supported intrinsics of ``_mm(256|512)_(mask(z))_loadrs_epi(8|16|32|64)``.
+
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 152c43d7908ff8..4cb2e77f8d9f61 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -6415,6 +6415,8 @@ def mmovdiri : Flag<["-"], "mmovdiri">, Group<m_x86_Features_Group>;
def mno_movdiri : Flag<["-"], "mno-movdiri">, Group<m_x86_Features_Group>;
def mmovdir64b : Flag<["-"], "mmovdir64b">, Group<m_x86_Features_Group>;
def mno_movdir64b : Flag<["-"], "mno-movdir64b">, Group<m_x86_Features_Group>;
+def mmovrs : Flag<["-"], "mmovrs">, Group<m_x86_Features_Group>;
+def mno_movrs : Flag<["-"], "mno-movrs">, Group<m_x86_Features_Group>;
def mmwaitx : Flag<["-"], "mmwaitx">, Group<m_x86_Features_Group>;
def mno_mwaitx : Flag<["-"], "mno-mwaitx">, Group<m_x86_Features_Group>;
def mpku : Flag<["-"], "mpku">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index d4d099504a6a71..d067ec218b5270 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -917,6 +917,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__MOVDIRI__");
if (HasMOVDIR64B)
Builder.defineMacro("__MOVDIR64B__");
+ if (HasMOVRS)
+ Builder.defineMacro("__MOVRS__");
if (HasPCONFIG)
Builder.defineMacro("__PCONFIG__");
if (HasPTWRITE)
diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c
index 2a05074d7c2b68..1e53621bc6b5ae 100644
--- a/clang/test/CodeGen/target-builtin-noerror.c
+++ b/clang/test/CodeGen/target-builtin-noerror.c
@@ -145,6 +145,7 @@ void verifyfeaturestrings(void) {
(void)__builtin_cpu_supports("avx10.1-512");
(void)__builtin_cpu_supports("avx10.2-256");
(void)__builtin_cpu_supports("avx10.2-512");
+ (void)__builtin_cpu_supports("movrs");
}
void verifycpustrings(void) {
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index ddfbb29a48f8d5..02370ef60b7feb 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -404,6 +404,11 @@
// USERMSR: "-target-feature" "+usermsr"
// NO-USERMSR: "-target-feature" "-usermsr"
+// RUN: %clang --target=i386 -mmovrs %s -### -o %t.o 2>&1 | FileCheck -check-prefix=MOVRS %s
+// RUN: %clang --target=i386 -mno-movrs %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-MOVRS %s
+// MOVRS: "-target-feature" "+movrs"
+// NO-MOVRS: "-target-feature" "-movrs"
+
// RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
// RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
// CRC32: "-target-feature" "+crc32"
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 8b4e6bdc09226a..2d1d2e57bdc772 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -740,6 +740,12 @@
// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-usermsr -x c -E -dM -o - %s | FileCheck -check-prefix=NO-USERMSR %s
// NO-USERMSR-NOT: #define __USERMSR__ 1
+// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mmovrs -x c -E -dM -o - %s | FileCheck -check-prefix=MOVRS %s
+// MOVRS: #define __MOVRS__ 1
+
+// RUN: %clang -target i686-unknown-linux-gnu -march=atom -mno-movrs -x c -E -dM -o - %s | FileCheck -check-prefix=NO-MOVRS %s
+// NO-MOVRS-NOT: #define __MOVRS__ 1
+
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
// CRC32: #define __CRC32__ 1
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 23f8fa3e1fd490..c7ed9867f3dbd9 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -229,6 +229,7 @@ enum ProcessorFeatures {
FEATURE_AVX10_1_512,
FEATURE_AVX10_2_256,
FEATURE_AVX10_2_512,
+ FEATURE_MOVRS,
CPU_FEATURE_MAX
};
@@ -970,6 +971,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
setFeature(FEATURE_HRESET);
if (HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave)
setFeature(FEATURE_AVXIFMA);
+ if (HasLeaf7Subleaf1 && ((EAX >> 31) & 1))
+ setFeature(FEATURE_MOVRS);
if (HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave)
setFeature(FEATURE_AVXVNNIINT8);
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index e5853789c78b63..7e4a8258d8058e 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -196,6 +196,8 @@ Changes to the X86 Backend
* Support ISA of `AVX10.2-256` and `AVX10.2-512`.
+* Supported instructions of `MOVRS AND AVX10.2`
+
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 9834aaacba18d0..a6d1ce64765dc0 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1839,6 +1839,7 @@ const StringMap<bool> sys::getHostCPUFeatures() {
Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
+ Features["movrs"] = HasLeaf7Subleaf1 && ((EAX >> 31) & 1);
Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
>From 38ff3cf94f8427bf946cde0819092f2707f78909 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Wed, 23 Oct 2024 10:26:08 +0800
Subject: [PATCH 3/4] clang format
---
llvm/lib/TargetParser/Host.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index a6d1ce64765dc0..489457d010064d 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1839,7 +1839,7 @@ const StringMap<bool> sys::getHostCPUFeatures() {
Features["cmpccxadd"] = HasLeaf7Subleaf1 && ((EAX >> 7) & 1);
Features["hreset"] = HasLeaf7Subleaf1 && ((EAX >> 22) & 1);
Features["avxifma"] = HasLeaf7Subleaf1 && ((EAX >> 23) & 1) && HasAVXSave;
- Features["movrs"] = HasLeaf7Subleaf1 && ((EAX >> 31) & 1);
+ Features["movrs"] = HasLeaf7Subleaf1 && ((EAX >> 31) & 1);
Features["avxvnniint8"] = HasLeaf7Subleaf1 && ((EDX >> 4) & 1) && HasAVXSave;
Features["avxneconvert"] = HasLeaf7Subleaf1 && ((EDX >> 5) & 1) && HasAVXSave;
Features["amx-complex"] = HasLeaf7Subleaf1 && ((EDX >> 8) & 1) && HasAMXSave;
>From 567131c8c265bd29dede1ad76dd1c52a4077b270 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 24 Oct 2024 13:24:28 +0800
Subject: [PATCH 4/4] address comments
---
clang/lib/Headers/movrs_avx10_2_512intrin.h | 2 +-
clang/lib/Headers/movrs_avx10_2intrin.h | 2 +-
llvm/include/llvm/IR/IntrinsicsX86.td | 24 ++++++++++-----------
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/clang/lib/Headers/movrs_avx10_2_512intrin.h b/clang/lib/Headers/movrs_avx10_2_512intrin.h
index 1d04d3122f2c5f..5cd907a5973494 100644
--- a/clang/lib/Headers/movrs_avx10_2_512intrin.h
+++ b/clang/lib/Headers/movrs_avx10_2_512intrin.h
@@ -1,4 +1,4 @@
-/*===-------- movrs_avx10_2_512intrin.h - AVX512MOVRS intrinsics -----------===
+/*===----- movrs_avx10_2_512intrin.h - AVX10.2-512-MOVRS intrinsics --------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
diff --git a/clang/lib/Headers/movrs_avx10_2intrin.h b/clang/lib/Headers/movrs_avx10_2intrin.h
index f38c78afe2ef94..27b625b6b43139 100644
--- a/clang/lib/Headers/movrs_avx10_2intrin.h
+++ b/clang/lib/Headers/movrs_avx10_2intrin.h
@@ -1,4 +1,4 @@
-/*===---------- movrs_avx10_2intrin.h - AVX512MOVRS intrinsics -------------===
+/*===--------- movrs_avx10_2intrin.h - AVX10.2-MOVRS intrinsics ------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 33070fef7a7351..d0083017fb9383 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -7575,39 +7575,39 @@ def int_x86_avx10_vfnmsub231nepbf16128 : ClangBuiltin<"__builtin_ia32_vfnmsub231
let TargetPrefix = "x86" in {
def int_x86_avx10_vmovrsb128 : ClangBuiltin<"__builtin_ia32_vmovrsb128">,
- Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v16i8_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsb256 : ClangBuiltin<"__builtin_ia32_vmovrsb256">,
- Intrinsic<[llvm_v32i8_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v32i8_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsb512 : ClangBuiltin<"__builtin_ia32_vmovrsb512">,
- Intrinsic<[llvm_v64i8_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v64i8_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsd128 : ClangBuiltin<"__builtin_ia32_vmovrsd128">,
- Intrinsic<[llvm_v4i32_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v4i32_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsd256 : ClangBuiltin<"__builtin_ia32_vmovrsd256">,
- Intrinsic<[llvm_v8i32_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v8i32_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsd512 : ClangBuiltin<"__builtin_ia32_vmovrsd512">,
- Intrinsic<[llvm_v16i32_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v16i32_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsq128 : ClangBuiltin<"__builtin_ia32_vmovrsq128">,
- Intrinsic<[llvm_v2i64_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v2i64_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsq256 : ClangBuiltin<"__builtin_ia32_vmovrsq256">,
- Intrinsic<[llvm_v4i64_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsq512 : ClangBuiltin<"__builtin_ia32_vmovrsq512">,
- Intrinsic<[llvm_v8i64_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v8i64_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsw128 : ClangBuiltin<"__builtin_ia32_vmovrsw128">,
- Intrinsic<[llvm_v8i16_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v8i16_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsw256 : ClangBuiltin<"__builtin_ia32_vmovrsw256">,
- Intrinsic<[llvm_v16i16_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v16i16_ty], [llvm_ptr_ty],
[IntrReadMem]>;
def int_x86_avx10_vmovrsw512 : ClangBuiltin<"__builtin_ia32_vmovrsw512">,
- Intrinsic<[llvm_v32i16_ty], [llvm_ptr_ty],
+ DefaultAttrsIntrinsic<[llvm_v32i16_ty], [llvm_ptr_ty],
[IntrReadMem]>;
}
More information about the cfe-commits
mailing list