[clang] [llvm] [X86][AVX10.2] Support AVX10.2 MOVZXC new Instructions. (PR #108537)

via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 13 10:46:30 PDT 2024


https://github.com/mahesh-attarde updated https://github.com/llvm/llvm-project/pull/108537

>From d8e76ea27679df40d0c796a5e8e5bc31433b6bff Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Fri, 13 Sep 2024 03:26:14 -0700
Subject: [PATCH 1/3] update clr

---
 clang/lib/Headers/CMakeLists.txt              |  1 +
 clang/lib/Headers/avx10_2copyintrin.h         | 34 ++++++++++
 clang/lib/Headers/immintrin.h                 |  1 +
 clang/test/CodeGen/X86/avx512copy-builtins.c  | 17 +++++
 llvm/lib/Target/X86/X86ISelLowering.cpp       |  5 +-
 llvm/lib/Target/X86/X86InstrAVX10.td          | 64 +++++++++++++++++++
 .../test/CodeGen/X86/avx512copy-intrinsics.ll | 35 ++++++++++
 .../MC/Disassembler/X86/avx10.2-copy-32.txt   | 34 ++++++++++
 .../MC/Disassembler/X86/avx10.2-copy-64.txt   | 34 ++++++++++
 llvm/test/MC/X86/avx10.2-copy-32-att.s        | 17 +++++
 llvm/test/MC/X86/avx10.2-copy-32-intel.s      | 17 +++++
 llvm/test/MC/X86/avx10.2-copy-64-att.s        | 17 +++++
 llvm/test/MC/X86/avx10.2-copy-64-intel.s      | 17 +++++
 llvm/test/TableGen/x86-fold-tables.inc        |  2 +
 llvm/utils/TableGen/X86ManualInstrMapping.def |  1 +
 15 files changed, 294 insertions(+), 2 deletions(-)
 create mode 100644 clang/lib/Headers/avx10_2copyintrin.h
 create mode 100644 clang/test/CodeGen/X86/avx512copy-builtins.c
 create mode 100644 llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
 create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
 create mode 100644 llvm/test/MC/X86/avx10.2-copy-32-att.s
 create mode 100644 llvm/test/MC/X86/avx10.2-copy-32-intel.s
 create mode 100644 llvm/test/MC/X86/avx10.2-copy-64-att.s
 create mode 100644 llvm/test/MC/X86/avx10.2-copy-64-intel.s

diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index a21e3901f63fea..fb55dca0fda405 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -155,6 +155,7 @@ set(x86_files
   avx10_2_512satcvtintrin.h
   avx10_2bf16intrin.h
   avx10_2convertintrin.h
+  avx10_2copyintrin.h
   avx10_2minmaxintrin.h
   avx10_2niintrin.h
   avx10_2satcvtdsintrin.h
diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
new file mode 100644
index 00000000000000..13e76c6abe8993
--- /dev/null
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -0,0 +1,34 @@
+/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2COPYINTRIN_H
+#define __AVX10_2COPYINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
+                 __min_vector_width__(128)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) {
+  return (__m128i)__builtin_shufflevector(
+      (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
+  return (__m128i)__builtin_shufflevector(
+      (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+
+#endif // __AVX10_2COPYINTRIN_H
\ No newline at end of file
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 280154f3c1026e..3fbabffa98df20 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) {
 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
 #include <avx10_2bf16intrin.h>
 #include <avx10_2convertintrin.h>
+#include <avx10_2copyintrin.h>
 #include <avx10_2minmaxintrin.h>
 #include <avx10_2niintrin.h>
 #include <avx10_2satcvtdsintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c
new file mode 100644
index 00000000000000..06f7507bde53ed
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512copy-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_move_epi32(__m128i A) {
+  // CHECK-LABEL: test_mm_move_epi32
+  // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+  return _mm_move_epi32(A);
+}
+
+__m128i test_mm_move_epi16(__m128i A) {
+  // CHECK-LABEL: test_mm_move_epi16
+  // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  return _mm_move_epi16(A);
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3c5b952ff62e24..38999de669c013 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12319,7 +12319,7 @@ static SDValue lowerShuffleAsElementInsertion(
     }
     V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
   } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
-             EltVT == MVT::i16) {
+             (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
     // Either not inserting from the low element of the input or the input
     // element size is too small to use VZEXT_MOVL to clear the high bits.
     return SDValue();
@@ -38197,7 +38197,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
 
   // Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
   if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
-       (MaskEltSize == 16 && Subtarget.hasFP16())) &&
+       (MaskEltSize == 16 &&
+        (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
       isUndefOrEqual(Mask[0], 0) &&
       isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
     Shuffle = X86ISD::VZEXT_MOVL;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index ada2bbaffd6645..f66705a5a3de35 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1537,3 +1537,67 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
 defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
                                               X86Fnmsub, SchedWriteFMA>;
 }
+
+//-------------------------------------------------
+// AVX10 MOVZXC (COPY) instructions
+//-------------------------------------------------
+let Predicates = [HasAVX10_2] in {
+  def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+                                    (ins VR128X:$src),
+                                    "vmovd\t{$src, $dst|$dst, $src}",
+                                    [(set VR128X:$dst, (v4i32 (X86vzmovl
+                                    (v4i32 VR128X:$src))))]>, EVEX,
+                                    Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+  def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+                                    (ins i32mem:$src),
+                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                     EVEX_CD8<32, CD8VT1>,
+                                     Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+  def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
+                                    (ins i32mem:$dst, VR128X:$src),
+                                    "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                    EVEX_CD8<32, CD8VT1>,
+                                    Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
+                                     (ins VR128X:$src),
+                                     "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                     Sched<[WriteVecMoveFromGpr]>;
+  def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
+                  (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
+
+def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
+                                  (ins VR128X:$src),
+                                  "vmovw\t{$src, $dst|$dst, $src}",
+                                  [(set VR128X:$dst, (v8i16 (X86vzmovl
+                                  (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
+                                  Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+  def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+                                    (ins i16mem:$src),
+                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
+                                    Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+  def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
+                                    (ins i32mem:$dst, VR128X:$src),
+                                    "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+                                    EVEX_CD8<16, CD8VT1>, T_MAP5,
+                                    Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+  def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+                                     (ins VR128X:$src),
+                                     "vmovw\t{$src, $dst|$dst, $src}",
+                                     []>, EVEX, T_MAP5,
+                                     Sched<[WriteVecMoveFromGpr]>;
+  def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
+                  (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
new file mode 100644
index 00000000000000..a7ca23792e6feb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC
+
+define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi32:
+; AVX102:       # %bb.0:
+; AVX102-NEXT:    vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0]
+; AVX102-NEXT:    retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
+; NOAVX512MOVZXC:       # %bb.0:
+; NOAVX512MOVZXC-NEXT:    vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
+; NOAVX512MOVZXC-NEXT:    vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT:    # xmm0 = xmm0[0],xmm1[1,2,3]
+; NOAVX512MOVZXC-NEXT:    retq # encoding: [0xc3]
+  %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+  ret <4 x i32> %res
+}
+
+define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi16:
+; AVX102:       # %bb.0:
+; AVX102-NEXT:    vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0]
+; AVX102-NEXT:    retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi16:
+; NOAVX512MOVZXC:       # %bb.0:
+; NOAVX512MOVZXC-NEXT:    vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; NOAVX512MOVZXC-NEXT:    vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT:    # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; NOAVX512MOVZXC-NEXT:    retq # encoding: [0xc3]
+  %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+  ret <8 x i16> %res
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
new file mode 100644
index 00000000000000..e86c2340a486c5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   vmovd   (%ecx), %xmm5
+# INTEL: vmovd   xmm5, dword ptr [ecx]
+0x62 0xf1 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovd   %xmm5, (%ecx)
+# INTEL: vmovd   dword ptr [ecx], xmm5
+0x62 0xf1 0x7d 0x08 0xd6 0x29
+
+# ATT:   vmovd   %xmm2, %xmm1
+# INTEL: vmovd   xmm1, xmm2
+0x62 0xf1 0x7e 0x08 0x7e 0xca
+
+# ATT:   vmovd   %xmm2, %xmm1
+# INTEL: vmovd   xmm1, xmm2
+0x62 0xf1 0x7d 0x08 0xd6 0xca
+
+# ATT:   vmovw   %xmm5, (%ecx)
+# INTEL: vmovw   dword ptr [ecx], xmm5
+0x62 0xf5 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovw   (%ecx), %xmm5
+# INTEL: vmovw   xmm5, word ptr [ecx]
+0x62 0xf5 0x7e 0x08 0x6e 0x29
+
+# ATT:   vmovw   %xmm2, %xmm1
+# INTEL: vmovw   xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x6e 0xca
+
+# ATT:   vmovw   %xmm2, %xmm1
+# INTEL: vmovw   xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x7e 0xca
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
new file mode 100644
index 00000000000000..36ddd75a77ad39
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:   vmovd   (%rcx), %xmm29
+# INTEL: vmovd   xmm29, dword ptr [rcx]
+0x62 0x61 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovd   %xmm29, (%rcx)
+# INTEL: vmovd   dword ptr [rcx], xmm29
+0x62 0x61 0x7d 0x08 0xd6 0x29
+
+# ATT:   vmovd   %xmm22, %xmm21
+# INTEL: vmovd   xmm21, xmm22
+0x62 0xa1 0x7e 0x08 0x7e 0xee
+
+# ATT:   vmovd   %xmm22, %xmm21
+# INTEL: vmovd   xmm21, xmm22
+0x62 0xa1 0x7d 0x08 0xd6 0xee
+
+# ATT:   vmovw   %xmm29, (%rcx)
+# INTEL: vmovw   dword ptr [rcx], xmm29
+0x62 0x65 0x7e 0x08 0x7e 0x29
+
+# ATT:   vmovw   (%rcx), %xmm29
+# INTEL: vmovw   xmm29, word ptr [rcx]
+0x62 0x65 0x7e 0x08 0x6e 0x29
+
+# ATT:   vmovw   %xmm22, %xmm21
+# INTEL: vmovw   xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x6e 0xee
+
+# ATT:   vmovw   %xmm22, %xmm21
+# INTEL: vmovw   xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x7e 0xee
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
new file mode 100644
index 00000000000000..a77f19a5dce542
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+          vmovd   %xmm2, %xmm1
+
+// CHECK: vmovd   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+          vmovd.s   %xmm2, %xmm1
+
+// CHECK: vmovw   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+          vmovw   %xmm2, %xmm1
+
+// CHECK: vmovw   %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+          vmovw.s   %xmm2, %xmm1
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
new file mode 100644
index 00000000000000..222dc2f939c77a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+          vmovd   xmm1, xmm2
+
+// CHECK: vmovd   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+          vmovd.s   xmm1, xmm2
+
+// CHECK: vmovw   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+          vmovw   xmm1, xmm2
+
+// CHECK: vmovw   xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+          vmovw.s   xmm1, xmm2
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
new file mode 100644
index 00000000000000..e27d333222a38a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+          vmovd   %xmm22, %xmm21
+
+// CHECK: vmovd   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+          vmovd.s   %xmm22, %xmm21
+
+// CHECK: vmovw   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+          vmovw   %xmm22, %xmm21
+
+// CHECK: vmovw   %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+          vmovw.s   %xmm22, %xmm21
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
new file mode 100644
index 00000000000000..ed364d4402313d
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+          vmovd   xmm21, xmm22
+
+// CHECK: vmovd   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+          vmovd.s   xmm21, xmm22
+
+// CHECK: vmovw   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+          vmovw   xmm21, xmm22
+
+// CHECK: vmovw   xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+          vmovw.s   xmm21, xmm22
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index be1b59eb50c91c..a993cce57696a8 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1614,8 +1614,10 @@ static const X86FoldTableEntry Table1[] = {
   {X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0},
   {X86::VMOVUPSrr, X86::VMOVUPSrm, 0},
   {X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE},
+  {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE},
   {X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE},
   {X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE},
+  {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE},
   {X86::VPABSBYrr, X86::VPABSBYrm, 0},
   {X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0},
   {X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0},
diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def
index f0154b80a80dbe..53a276a9343f54 100644
--- a/llvm/utils/TableGen/X86ManualInstrMapping.def
+++ b/llvm/utils/TableGen/X86ManualInstrMapping.def
@@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri)
 NOCOMP(VPSRAQZ128rm)
 NOCOMP(VPSRAQZ128rr)
 NOCOMP(VSCALEFPSZ128rm)
+NOCOMP(VMOVZPDILo2PDIZrr)
 NOCOMP(VDBPSADBWZ256rmi)
 NOCOMP(VDBPSADBWZ256rri)
 NOCOMP(VPMAXSQZ256rm)

>From 3cb612b1332bbc26a7bd2387fb4fbc2ad8bac4ad Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Fri, 13 Sep 2024 10:37:39 -0700
Subject: [PATCH 2/3] update test

---
 llvm/test/MC/X86/avx10.2-copy-32-att.s   |  89 ++++++++++++++++---
 llvm/test/MC/X86/avx10.2-copy-32-intel.s |  88 ++++++++++++++++---
 llvm/test/MC/X86/avx10.2-copy-64-att.s   | 104 ++++++++++++++++++++---
 llvm/test/MC/X86/avx10.2-copy-64-intel.s | 104 ++++++++++++++++++++---
 4 files changed, 337 insertions(+), 48 deletions(-)

diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
index a77f19a5dce542..2bc498720849c9 100644
--- a/llvm/test/MC/X86/avx10.2-copy-32-att.s
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -1,17 +1,82 @@
 // RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
 
-// CHECK: vmovd   %xmm2, %xmm1
-// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
-          vmovd   %xmm2, %xmm1
+// CHECK: vmovd  268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+          vmovd  268435456(%esp,%esi,8), %xmm2
 
-// CHECK: vmovd   %xmm2, %xmm1
-// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
-          vmovd.s   %xmm2, %xmm1
+// CHECK: vmovd  291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+          vmovd  291(%edi,%eax,4), %xmm2
 
-// CHECK: vmovw   %xmm2, %xmm1
-// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
-          vmovw   %xmm2, %xmm1
+// CHECK: vmovd  (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x10]
+          vmovd  (%eax), %xmm2
+
+// CHECK: vmovd  -128(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff]
+          vmovd  -128(,%ebp,2), %xmm2
+
+// CHECK: vmovd  %xmm3, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+          vmovd  %xmm3, 268435456(%esp,%esi,8)
+
+// CHECK: vmovd  %xmm3, 291(%edi,%eax,4)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+          vmovd  %xmm3, 291(%edi,%eax,4)
+
+// CHECK: vmovd  %xmm3, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x18]
+          vmovd  %xmm3, (%eax)
+
+// CHECK: vmovd  %xmm3, -128(,%ebp,2)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff]
+          vmovd  %xmm3, -128(,%ebp,2)
+
+// CHECK: vmovw  268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+          vmovw  268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vmovw  291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+          vmovw  291(%edi,%eax,4), %xmm2
+
+// CHECK: vmovw  (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10]
+          vmovw  (%eax), %xmm2
+
+// CHECK: vmovw  -64(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff]
+          vmovw  -64(,%ebp,2), %xmm2
+
+// CHECK: vmovw  254(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f]
+          vmovw  254(%ecx), %xmm2
+
+// CHECK: vmovw  -256(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80]
+          vmovw  -256(%edx), %xmm2
+
+// CHECK: vmovw  %xmm3, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+          vmovw  %xmm3, 268435456(%esp,%esi,8)
+
+// CHECK: vmovw  %xmm3, 291(%edi,%eax,4)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+          vmovw  %xmm3, 291(%edi,%eax,4)
+
+// CHECK: vmovw  %xmm3, (%eax)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18]
+          vmovw  %xmm3, (%eax)
+
+// CHECK: vmovw  %xmm3, -64(,%ebp,2)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff]
+          vmovw  %xmm3, -64(,%ebp,2)
+
+// CHECK: vmovw  %xmm3, 254(%ecx)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f]
+          vmovw  %xmm3, 254(%ecx)
+
+// CHECK: vmovw  %xmm3, -256(%edx)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80]
+          vmovw  %xmm3, -256(%edx)
 
-// CHECK: vmovw   %xmm2, %xmm1
-// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
-          vmovw.s   %xmm2, %xmm1
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
index 222dc2f939c77a..aa84548e5f75dd 100644
--- a/llvm/test/MC/X86/avx10.2-copy-32-intel.s
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -1,17 +1,81 @@
 // RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-// CHECK: vmovd   xmm1, xmm2
-// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
-          vmovd   xmm1, xmm2
+// CHECK: vmovd xmm2, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+          vmovd xmm2, dword ptr [esp + 8*esi + 268435456]
 
-// CHECK: vmovd   xmm1, xmm2
-// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
-          vmovd.s   xmm1, xmm2
+// CHECK: vmovd xmm2, dword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+          vmovd xmm2, dword ptr [edi + 4*eax + 291]
 
-// CHECK: vmovw   xmm1, xmm2
-// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
-          vmovw   xmm1, xmm2
+// CHECK: vmovd xmm2, dword ptr [eax]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x10]
+          vmovd xmm2, dword ptr [eax]
 
-// CHECK: vmovw   xmm1, xmm2
-// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
-          vmovw.s   xmm1, xmm2
+// CHECK: vmovd xmm2, dword ptr [2*ebp - 128]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff]
+          vmovd xmm2, dword ptr [2*ebp - 128]
+
+// CHECK: vmovd dword ptr [esp + 8*esi + 268435456], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+          vmovd dword ptr [esp + 8*esi + 268435456], xmm3
+
+// CHECK: vmovd dword ptr [edi + 4*eax + 291], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+          vmovd dword ptr [edi + 4*eax + 291], xmm3
+
+// CHECK: vmovd dword ptr [eax], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x18]
+          vmovd dword ptr [eax], xmm3
+
+// CHECK: vmovd dword ptr [2*ebp - 128], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff]
+          vmovd dword ptr [2*ebp - 128], xmm3
+
+// CHECK: vmovw xmm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+          vmovw xmm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmovw xmm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+          vmovw xmm2, word ptr [edi + 4*eax + 291]
+
+// CHECK: vmovw xmm2, word ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10]
+          vmovw xmm2, word ptr [eax]
+
+// CHECK: vmovw xmm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff]
+          vmovw xmm2, word ptr [2*ebp - 64]
+
+// CHECK: vmovw xmm2, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f]
+          vmovw xmm2, word ptr [ecx + 254]
+
+// CHECK: vmovw xmm2, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80]
+          vmovw xmm2, word ptr [edx - 256]
+
+// CHECK: vmovw word ptr [esp + 8*esi + 268435456], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+          vmovw word ptr [esp + 8*esi + 268435456], xmm3
+
+// CHECK: vmovw word ptr [edi + 4*eax + 291], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+          vmovw word ptr [edi + 4*eax + 291], xmm3
+
+// CHECK: vmovw word ptr [eax], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18]
+          vmovw word ptr [eax], xmm3
+
+// CHECK: vmovw word ptr [2*ebp - 64], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff]
+          vmovw word ptr [2*ebp - 64], xmm3
+
+// CHECK: vmovw word ptr [ecx + 254], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f]
+          vmovw word ptr [ecx + 254], xmm3
+
+// CHECK: vmovw word ptr [edx - 256], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80]
+          vmovw word ptr [edx - 256], xmm3
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
index e27d333222a38a..a672b2d842240c 100644
--- a/llvm/test/MC/X86/avx10.2-copy-64-att.s
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -1,17 +1,97 @@
 // RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
 
-// CHECK: vmovd   %xmm22, %xmm21
-// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
-          vmovd   %xmm22, %xmm21
+// CHECK: vmovd  268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovd  268435456(%rbp,%r14,8), %xmm22
 
-// CHECK: vmovd   %xmm22, %xmm21
-// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
-          vmovd.s   %xmm22, %xmm21
+// CHECK: vmovd  291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovd  291(%r8,%rax,4), %xmm22
 
-// CHECK: vmovw   %xmm22, %xmm21
-// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
-          vmovw   %xmm22, %xmm21
+// CHECK: vmovd  (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+          vmovd  (%rip), %xmm22
 
-// CHECK: vmovw   %xmm22, %xmm21
-// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
-          vmovw.s   %xmm22, %xmm21
+// CHECK: vmovd  -128(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff]
+          vmovd  -128(,%rbp,2), %xmm22
+
+// CHECK: vmovd  508(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f]
+          vmovd  508(%rcx), %xmm22
+
+// CHECK: vmovd  -512(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80]
+          vmovd  -512(%rdx), %xmm22
+
+// CHECK: vmovd  %xmm23, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+          vmovd  %xmm23, 268435456(%rbp,%r14,8)
+
+// CHECK: vmovd  %xmm23, 291(%r8,%rax,4)
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+          vmovd  %xmm23, 291(%r8,%rax,4)
+
+// CHECK: vmovd  %xmm23, (%rip)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+          vmovd  %xmm23, (%rip)
+
+// CHECK: vmovd  %xmm23, -128(,%rbp,2)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff]
+          vmovd  %xmm23, -128(,%rbp,2)
+
+// CHECK: vmovd  %xmm23, 508(%rcx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f]
+          vmovd  %xmm23, 508(%rcx)
+
+// CHECK: vmovd  %xmm23, -512(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80]
+          vmovd  %xmm23, -512(%rdx)
+
+// CHECK: vmovw  268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovw  268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vmovw  291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovw  291(%r8,%rax,4), %xmm22
+
+// CHECK: vmovw  (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+          vmovw  (%rip), %xmm22
+
+// CHECK: vmovw  -64(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff]
+          vmovw  -64(,%rbp,2), %xmm22
+
+// CHECK: vmovw  254(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f]
+          vmovw  254(%rcx), %xmm22
+
+// CHECK: vmovw  -256(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80]
+          vmovw  -256(%rdx), %xmm22
+
+// CHECK: vmovw  %xmm23, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+          vmovw  %xmm23, 268435456(%rbp,%r14,8)
+
+// CHECK: vmovw  %xmm23, 291(%r8,%rax,4)
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+          vmovw  %xmm23, 291(%r8,%rax,4)
+
+// CHECK: vmovw  %xmm23, (%rip)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+          vmovw  %xmm23, (%rip)
+
+// CHECK: vmovw  %xmm23, -64(,%rbp,2)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff]
+          vmovw  %xmm23, -64(,%rbp,2)
+
+// CHECK: vmovw  %xmm23, 254(%rcx)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f]
+          vmovw  %xmm23, 254(%rcx)
+
+// CHECK: vmovw  %xmm23, -256(%rdx)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80]
+          vmovw  %xmm23, -256(%rdx)
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
index ed364d4402313d..4fd7b67dfa5db5 100644
--- a/llvm/test/MC/X86/avx10.2-copy-64-intel.s
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -1,17 +1,97 @@
 // RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-// CHECK: vmovd   xmm21, xmm22
-// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
-          vmovd   xmm21, xmm22
+// CHECK: vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456]
 
-// CHECK: vmovd   xmm21, xmm22
-// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
-          vmovd.s   xmm21, xmm22
+// CHECK: vmovd xmm22, dword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovd xmm22, dword ptr [r8 + 4*rax + 291]
 
-// CHECK: vmovw   xmm21, xmm22
-// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
-          vmovw   xmm21, xmm22
+// CHECK: vmovd xmm22, dword ptr [rip]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+          vmovd xmm22, dword ptr [rip]
 
-// CHECK: vmovw   xmm21, xmm22
-// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
-          vmovw.s   xmm21, xmm22
+// CHECK: vmovd xmm22, dword ptr [2*rbp - 128]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff]
+          vmovd xmm22, dword ptr [2*rbp - 128]
+
+// CHECK: vmovd xmm22, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f]
+          vmovd xmm22, dword ptr [rcx + 508]
+
+// CHECK: vmovd xmm22, dword ptr [rdx - 512]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80]
+          vmovd xmm22, dword ptr [rdx - 512]
+
+// CHECK: vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+          vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23
+
+// CHECK: vmovd dword ptr [r8 + 4*rax + 291], xmm23
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+          vmovd dword ptr [r8 + 4*rax + 291], xmm23
+
+// CHECK: vmovd dword ptr [rip], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+          vmovd dword ptr [rip], xmm23
+
+// CHECK: vmovd dword ptr [2*rbp - 128], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff]
+          vmovd dword ptr [2*rbp - 128], xmm23
+
+// CHECK: vmovd dword ptr [rcx + 508], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f]
+          vmovd dword ptr [rcx + 508], xmm23
+
+// CHECK: vmovd dword ptr [rdx - 512], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80]
+          vmovd dword ptr [rdx - 512], xmm23
+
+// CHECK: vmovw xmm22, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+          vmovw xmm22, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovw xmm22, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+          vmovw xmm22, word ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovw xmm22, word ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+          vmovw xmm22, word ptr [rip]
+
+// CHECK: vmovw xmm22, word ptr [2*rbp - 64]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff]
+          vmovw xmm22, word ptr [2*rbp - 64]
+
+// CHECK: vmovw xmm22, word ptr [rcx + 254]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f]
+          vmovw xmm22, word ptr [rcx + 254]
+
+// CHECK: vmovw xmm22, word ptr [rdx - 256]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80]
+          vmovw xmm22, word ptr [rdx - 256]
+
+// CHECK: vmovw word ptr [rbp + 8*r14 + 268435456], xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+          vmovw word ptr [rbp + 8*r14 + 268435456], xmm23
+
+// CHECK: vmovw word ptr [r8 + 4*rax + 291], xmm23
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+          vmovw word ptr [r8 + 4*rax + 291], xmm23
+
+// CHECK: vmovw word ptr [rip], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+          vmovw word ptr [rip], xmm23
+
+// CHECK: vmovw word ptr [2*rbp - 64], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff]
+          vmovw word ptr [2*rbp - 64], xmm23
+
+// CHECK: vmovw word ptr [rcx + 254], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f]
+          vmovw word ptr [rcx + 254], xmm23
+
+// CHECK: vmovw word ptr [rdx - 256], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80]
+          vmovw word ptr [rdx - 256], xmm23

>From 2a1c7f5f23185b2884452b566b448380ac3a6a88 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Fri, 13 Sep 2024 10:43:22 -0700
Subject: [PATCH 3/3] remove linebreak warning

---
 clang/lib/Headers/avx10_2copyintrin.h | 2 +-
 llvm/lib/Target/X86/X86InstrAVX10.td  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
index 13e76c6abe8993..7fc31190781d91 100644
--- a/clang/lib/Headers/avx10_2copyintrin.h
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -31,4 +31,4 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
 
 #undef __DEFAULT_FN_ATTRS128
 
-#endif // __AVX10_2COPYINTRIN_H
\ No newline at end of file
+#endif // __AVX10_2COPYINTRIN_H
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index f66705a5a3de35..2dc65e792f83e2 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1600,4 +1600,4 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
                                      Sched<[WriteVecMoveFromGpr]>;
   def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
                   (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
-}
\ No newline at end of file
+}



More information about the cfe-commits mailing list