[clang] [llvm] [X86][AVX10.2] Support AVX10.2 MOVZXC new Instructions. (PR #108537)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 13 04:25:01 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Mahesh-Attarde (mahesh-attarde)
<details>
<summary>Changes</summary>
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
Chapter 14 INTELĀ® AVX10 ZERO-EXTENDING PARTIAL VECTOR COPY INSTRUCTIONS
---
Full diff: https://github.com/llvm/llvm-project/pull/108537.diff
15 Files Affected:
- (modified) clang/lib/Headers/CMakeLists.txt (+1)
- (added) clang/lib/Headers/avx10_2copyintrin.h (+34)
- (modified) clang/lib/Headers/immintrin.h (+1)
- (added) clang/test/CodeGen/X86/avx512copy-builtins.c (+17)
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+3-2)
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+64)
- (added) llvm/test/CodeGen/X86/avx512copy-intrinsics.ll (+35)
- (added) llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt (+34)
- (added) llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt (+34)
- (added) llvm/test/MC/X86/avx10.2-copy-32-att.s (+17)
- (added) llvm/test/MC/X86/avx10.2-copy-32-intel.s (+17)
- (added) llvm/test/MC/X86/avx10.2-copy-64-att.s (+17)
- (added) llvm/test/MC/X86/avx10.2-copy-64-intel.s (+17)
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+2)
- (modified) llvm/utils/TableGen/X86ManualInstrMapping.def (+1)
``````````diff
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index a21e3901f63fea..fb55dca0fda405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -155,6 +155,7 @@ set(x86_files
avx10_2_512satcvtintrin.h
avx10_2bf16intrin.h
avx10_2convertintrin.h
+ avx10_2copyintrin.h
avx10_2minmaxintrin.h
avx10_2niintrin.h
avx10_2satcvtdsintrin.h
diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
new file mode 100644
index 00000000000000..13e76c6abe8993
--- /dev/null
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -0,0 +1,34 @@
+/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2COPYINTRIN_H
+#define __AVX10_2COPYINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(128)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+
+#endif // __AVX10_2COPYINTRIN_H
\ No newline at end of file
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 280154f3c1026e..3fbabffa98df20 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) {
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
#include <avx10_2bf16intrin.h>
#include <avx10_2convertintrin.h>
+#include <avx10_2copyintrin.h>
#include <avx10_2minmaxintrin.h>
#include <avx10_2niintrin.h>
#include <avx10_2satcvtdsintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c
new file mode 100644
index 00000000000000..06f7507bde53ed
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512copy-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_move_epi32(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi32
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ return _mm_move_epi32(A);
+}
+
+__m128i test_mm_move_epi16(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi16
+ // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ return _mm_move_epi16(A);
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3c5b952ff62e24..38999de669c013 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12319,7 +12319,7 @@ static SDValue lowerShuffleAsElementInsertion(
}
V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
} else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
- EltVT == MVT::i16) {
+ (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
// Either not inserting from the low element of the input or the input
// element size is too small to use VZEXT_MOVL to clear the high bits.
return SDValue();
@@ -38197,7 +38197,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
- (MaskEltSize == 16 && Subtarget.hasFP16())) &&
+ (MaskEltSize == 16 &&
+ (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index ada2bbaffd6645..f66705a5a3de35 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1537,3 +1537,67 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
+
+//-------------------------------------------------
+// AVX10 MOVZXC (COPY) instructions
+//-------------------------------------------------
+let Predicates = [HasAVX10_2] in {
+ def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v4i32 (X86vzmovl
+ (v4i32 VR128X:$src))))]>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i32mem:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
+
+def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v8i16 (X86vzmovl
+ (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i16mem:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ []>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
new file mode 100644
index 00000000000000..a7ca23792e6feb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC
+
+define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi32:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
+; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi16:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi16:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; NOAVX512MOVZXC-NEXT: vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <8 x i16> %res
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
new file mode 100644
index 00000000000000..e86c2340a486c5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%ecx), %xmm5
+# INTEL: vmovd xmm5, dword ptr [ecx]
+0x62 0xf1 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm5, (%ecx)
+# INTEL: vmovd dword ptr [ecx], xmm5
+0x62 0xf1 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7e 0x08 0x7e 0xca
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7d 0x08 0xd6 0xca
+
+# ATT: vmovw %xmm5, (%ecx)
+# INTEL: vmovw dword ptr [ecx], xmm5
+0x62 0xf5 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%ecx), %xmm5
+# INTEL: vmovw xmm5, word ptr [ecx]
+0x62 0xf5 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x6e 0xca
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x7e 0xca
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
new file mode 100644
index 00000000000000..36ddd75a77ad39
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%rcx), %xmm29
+# INTEL: vmovd xmm29, dword ptr [rcx]
+0x62 0x61 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm29, (%rcx)
+# INTEL: vmovd dword ptr [rcx], xmm29
+0x62 0x61 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7e 0x08 0x7e 0xee
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7d 0x08 0xd6 0xee
+
+# ATT: vmovw %xmm29, (%rcx)
+# INTEL: vmovw dword ptr [rcx], xmm29
+0x62 0x65 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%rcx), %xmm29
+# INTEL: vmovw xmm29, word ptr [rcx]
+0x62 0x65 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x6e 0xee
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x7e 0xee
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
new file mode 100644
index 00000000000000..a77f19a5dce542
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+ vmovd %xmm2, %xmm1
+
+// CHECK: vmovd %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+ vmovd.s %xmm2, %xmm1
+
+// CHECK: vmovw %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+ vmovw %xmm2, %xmm1
+
+// CHECK: vmovw %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+ vmovw.s %xmm2, %xmm1
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
new file mode 100644
index 00000000000000..222dc2f939c77a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+ vmovd xmm1, xmm2
+
+// CHECK: vmovd xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+ vmovd.s xmm1, xmm2
+
+// CHECK: vmovw xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+ vmovw xmm1, xmm2
+
+// CHECK: vmovw xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+ vmovw.s xmm1, xmm2
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
new file mode 100644
index 00000000000000..e27d333222a38a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+ vmovd %xmm22, %xmm21
+
+// CHECK: vmovd %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+ vmovd.s %xmm22, %xmm21
+
+// CHECK: vmovw %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+ vmovw %xmm22, %xmm21
+
+// CHECK: vmovw %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+ vmovw.s %xmm22, %xmm21
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
new file mode 100644
index 00000000000000..ed364d4402313d
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+ vmovd xmm21, xmm22
+
+// CHECK: vmovd xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+ vmovd.s xmm21, xmm22
+
+// CHECK: vmovw xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+ vmovw xmm21, xmm22
+
+// CHECK: vmovw xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+ vmovw.s xmm21, xmm22
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index be1b59eb50c91c..a993cce57696a8 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1614,8 +1614,10 @@ static const X86FoldTableEntry Table1[] = {
{X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0},
{X86::VMOVUPSrr, X86::VMOVUPSrm, 0},
{X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE},
+ {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE},
+ {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE},
{X86::VPABSBYrr, X86::VPABSBYrm, 0},
{X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0},
{X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0},
diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def
index f0154b80a80dbe..53a276a9343f54 100644
--- a/llvm/utils/TableGen/X86ManualInstrMapping.def
+++ b/llvm/utils/TableGen/X86ManualInstrMapping.def
@@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri)
NOCOMP(VPSRAQZ128rm)
NOCOMP(VPSRAQZ128rr)
NOCOMP(VSCALEFPSZ128rm)
+NOCOMP(VMOVZPDILo2PDIZrr)
NOCOMP(VDBPSADBWZ256rmi)
NOCOMP(VDBPSADBWZ256rri)
NOCOMP(VPMAXSQZ256rm)
``````````
</details>
https://github.com/llvm/llvm-project/pull/108537
More information about the cfe-commits
mailing list