[clang] [llvm] [X86][AVX10.2] Support AVX10.2 MOVZXC new Instructions. (PR #108537)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 18 01:40:01 PDT 2024
https://github.com/mahesh-attarde updated https://github.com/llvm/llvm-project/pull/108537
>From 71bbd1b23ed363517327a32f9aa92264866f143a Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Fri, 13 Sep 2024 03:26:14 -0700
Subject: [PATCH 1/6] update clr
---
clang/lib/Headers/CMakeLists.txt | 1 +
clang/lib/Headers/avx10_2copyintrin.h | 34 ++++++++++
clang/lib/Headers/immintrin.h | 1 +
clang/test/CodeGen/X86/avx512copy-builtins.c | 17 +++++
llvm/lib/Target/X86/X86ISelLowering.cpp | 5 +-
llvm/lib/Target/X86/X86InstrAVX10.td | 64 +++++++++++++++++++
.../test/CodeGen/X86/avx512copy-intrinsics.ll | 35 ++++++++++
.../MC/Disassembler/X86/avx10.2-copy-32.txt | 34 ++++++++++
.../MC/Disassembler/X86/avx10.2-copy-64.txt | 34 ++++++++++
llvm/test/MC/X86/avx10.2-copy-32-att.s | 17 +++++
llvm/test/MC/X86/avx10.2-copy-32-intel.s | 17 +++++
llvm/test/MC/X86/avx10.2-copy-64-att.s | 17 +++++
llvm/test/MC/X86/avx10.2-copy-64-intel.s | 17 +++++
llvm/test/TableGen/x86-fold-tables.inc | 2 +
llvm/utils/TableGen/X86ManualInstrMapping.def | 1 +
15 files changed, 294 insertions(+), 2 deletions(-)
create mode 100644 clang/lib/Headers/avx10_2copyintrin.h
create mode 100644 clang/test/CodeGen/X86/avx512copy-builtins.c
create mode 100644 llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
create mode 100644 llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
create mode 100644 llvm/test/MC/X86/avx10.2-copy-32-att.s
create mode 100644 llvm/test/MC/X86/avx10.2-copy-32-intel.s
create mode 100644 llvm/test/MC/X86/avx10.2-copy-64-att.s
create mode 100644 llvm/test/MC/X86/avx10.2-copy-64-intel.s
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 4c75c638b41bae..f5cc07c303f9eb 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -156,6 +156,7 @@ set(x86_files
avx10_2_512satcvtintrin.h
avx10_2bf16intrin.h
avx10_2convertintrin.h
+ avx10_2copyintrin.h
avx10_2minmaxintrin.h
avx10_2niintrin.h
avx10_2satcvtdsintrin.h
diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
new file mode 100644
index 00000000000000..13e76c6abe8993
--- /dev/null
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -0,0 +1,34 @@
+/*===---- avx10_2copyintrin.h - AVX10.2 Copy intrinsics -------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error \
+ "Never use <avx10_2copyintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2COPYINTRIN_H
+#define __AVX10_2COPYINTRIN_H
+
+/* Define the default attributes for the functions in this file. */
+#define __DEFAULT_FN_ATTRS128 \
+ __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \
+ __min_vector_width__(128)))
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi32(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v4si)__A, (__v4si)_mm_setzero_si128(), 0, 4, 4, 4);
+}
+
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
+ return (__m128i)__builtin_shufflevector(
+ (__v8hi)__A, (__v8hi)_mm_setzero_si128(), 0, 8, 8, 8, 8, 8, 8, 8);
+}
+
+#undef __DEFAULT_FN_ATTRS128
+
+#endif // __AVX10_2COPYINTRIN_H
\ No newline at end of file
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 280154f3c1026e..3fbabffa98df20 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -651,6 +651,7 @@ _storebe_i64(void * __P, long long __D) {
#if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
#include <avx10_2bf16intrin.h>
#include <avx10_2convertintrin.h>
+#include <avx10_2copyintrin.h>
#include <avx10_2minmaxintrin.h>
#include <avx10_2niintrin.h>
#include <avx10_2satcvtdsintrin.h>
diff --git a/clang/test/CodeGen/X86/avx512copy-builtins.c b/clang/test/CodeGen/X86/avx512copy-builtins.c
new file mode 100644
index 00000000000000..06f7507bde53ed
--- /dev/null
+++ b/clang/test/CodeGen/X86/avx512copy-builtins.c
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 %s -flax-vector-conversions=none -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.2-512 \
+// RUN: -emit-llvm -o - -Wall -Werror -pedantic -Wno-gnu-statement-expression | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m128i test_mm_move_epi32(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi32
+ // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ return _mm_move_epi32(A);
+}
+
+__m128i test_mm_move_epi16(__m128i A) {
+ // CHECK-LABEL: test_mm_move_epi16
+ // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ return _mm_move_epi16(A);
+}
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f6d42ade600885..6e8c8ca3c44d0a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12348,7 +12348,7 @@ static SDValue lowerShuffleAsElementInsertion(
}
V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
} else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
- EltVT == MVT::i16) {
+ (EltVT == MVT::i16 && !Subtarget.hasAVX10_2())) {
// Either not inserting from the low element of the input or the input
// element size is too small to use VZEXT_MOVL to clear the high bits.
return SDValue();
@@ -38342,7 +38342,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
- (MaskEltSize == 16 && Subtarget.hasFP16())) &&
+ (MaskEltSize == 16 &&
+ (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index ada2bbaffd6645..f66705a5a3de35 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1537,3 +1537,67 @@ defm VFNMADD132NEPBF16 : avx10_fma3p_132_bf16<0x9C, "vfnmadd132nepbf16", X86any_
defm VFNMSUB132NEPBF16 : avx10_fma3p_132_bf16<0x9E, "vfnmsub132nepbf16", X86any_Fnmsub,
X86Fnmsub, SchedWriteFMA>;
}
+
+//-------------------------------------------------
+// AVX10 MOVZXC (COPY) instructions
+//-------------------------------------------------
+let Predicates = [HasAVX10_2] in {
+ def VMOVZPDILo2PDIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v4i32 (X86vzmovl
+ (v4i32 VR128X:$src))))]>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPDILo2PDIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i32mem:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPDILo2PDIZmr : AVX512PDI<0xD6, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<32, CD8VT1>,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPDILo2PDIZrr2 : AVX512PDI<0xD6, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovd\t{$src, $dst|$dst, $src}", []>, EVEX,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovd.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPDILo2PDIZrr2 VR128X:$dst, VR128X:$src), 0>;
+
+def VMOVZPWILo2PWIZrr : AVX512XSI<0x6E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ [(set VR128X:$dst, (v8i16 (X86vzmovl
+ (v8i16 VR128X:$src))))]>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in
+ def VMOVZPWILo2PWIZrm : AVX512XSI<0x6E, MRMSrcMem, (outs VR128X:$dst),
+ (ins i16mem:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecLoad]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in
+ def VMOVZPWILo2PWIZmr : AVX512XSI<0x7E, MRMDestMem, (outs),
+ (ins i32mem:$dst, VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}", []>, EVEX,
+ EVEX_CD8<16, CD8VT1>, T_MAP5,
+ Sched<[WriteVecStore]>;
+
+let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
+ def VMOVZPWILo2PWIZrr2 : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
+ (ins VR128X:$src),
+ "vmovw\t{$src, $dst|$dst, $src}",
+ []>, EVEX, T_MAP5,
+ Sched<[WriteVecMoveFromGpr]>;
+ def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
+ (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
new file mode 100644
index 00000000000000..a7ca23792e6feb
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx512copy-intrinsics.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx10.2-256 | FileCheck %s --check-prefixes=AVX102
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+avx512f | FileCheck %s --check-prefixes=NOAVX512MOVZXC
+
+define <4 x i32> @test_mm_move_epi32(<4 x i32> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi32:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovd %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi32:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vxorps %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf0,0x57,0xc9]
+; NOAVX512MOVZXC-NEXT: vblendps $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 4, i32 4>
+ ret <4 x i32> %res
+}
+
+define <8 x i16> @test_mm_move_epi16(<8 x i16> %a0) nounwind {
+; AVX102-LABEL: test_mm_move_epi16:
+; AVX102: # %bb.0:
+; AVX102-NEXT: vmovw %xmm0, %xmm0 # encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xc0]
+; AVX102-NEXT: retq # encoding: [0xc3]
+;
+; NOAVX512MOVZXC-LABEL: test_mm_move_epi16:
+; NOAVX512MOVZXC: # %bb.0:
+; NOAVX512MOVZXC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9]
+; NOAVX512MOVZXC-NEXT: vpblendw $1, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0e,0xc0,0x01]
+; NOAVX512MOVZXC-NEXT: # xmm0 = xmm0[0],xmm1[1,2,3,4,5,6,7]
+; NOAVX512MOVZXC-NEXT: retq # encoding: [0xc3]
+ %res = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
+ ret <8 x i16> %res
+}
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
new file mode 100644
index 00000000000000..e86c2340a486c5
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-32.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%ecx), %xmm5
+# INTEL: vmovd xmm5, dword ptr [ecx]
+0x62 0xf1 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm5, (%ecx)
+# INTEL: vmovd dword ptr [ecx], xmm5
+0x62 0xf1 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7e 0x08 0x7e 0xca
+
+# ATT: vmovd %xmm2, %xmm1
+# INTEL: vmovd xmm1, xmm2
+0x62 0xf1 0x7d 0x08 0xd6 0xca
+
+# ATT: vmovw %xmm5, (%ecx)
+# INTEL: vmovw dword ptr [ecx], xmm5
+0x62 0xf5 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%ecx), %xmm5
+# INTEL: vmovw xmm5, word ptr [ecx]
+0x62 0xf5 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x6e 0xca
+
+# ATT: vmovw %xmm2, %xmm1
+# INTEL: vmovw xmm1, xmm2
+0x62 0xf5 0x7e 0x08 0x7e 0xca
diff --git a/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
new file mode 100644
index 00000000000000..36ddd75a77ad39
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/avx10.2-copy-64.txt
@@ -0,0 +1,34 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vmovd (%rcx), %xmm29
+# INTEL: vmovd xmm29, dword ptr [rcx]
+0x62 0x61 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovd %xmm29, (%rcx)
+# INTEL: vmovd dword ptr [rcx], xmm29
+0x62 0x61 0x7d 0x08 0xd6 0x29
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7e 0x08 0x7e 0xee
+
+# ATT: vmovd %xmm22, %xmm21
+# INTEL: vmovd xmm21, xmm22
+0x62 0xa1 0x7d 0x08 0xd6 0xee
+
+# ATT: vmovw %xmm29, (%rcx)
+# INTEL: vmovw dword ptr [rcx], xmm29
+0x62 0x65 0x7e 0x08 0x7e 0x29
+
+# ATT: vmovw (%rcx), %xmm29
+# INTEL: vmovw xmm29, word ptr [rcx]
+0x62 0x65 0x7e 0x08 0x6e 0x29
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x6e 0xee
+
+# ATT: vmovw %xmm22, %xmm21
+# INTEL: vmovw xmm21, xmm22
+0x62 0xa5 0x7e 0x08 0x7e 0xee
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
new file mode 100644
index 00000000000000..a77f19a5dce542
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+ vmovd %xmm2, %xmm1
+
+// CHECK: vmovd %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+ vmovd.s %xmm2, %xmm1
+
+// CHECK: vmovw %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+ vmovw %xmm2, %xmm1
+
+// CHECK: vmovw %xmm2, %xmm1
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+ vmovw.s %xmm2, %xmm1
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
new file mode 100644
index 00000000000000..222dc2f939c77a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
+ vmovd xmm1, xmm2
+
+// CHECK: vmovd xmm1, xmm2
+// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
+ vmovd.s xmm1, xmm2
+
+// CHECK: vmovw xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
+ vmovw xmm1, xmm2
+
+// CHECK: vmovw xmm1, xmm2
+// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
+ vmovw.s xmm1, xmm2
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
new file mode 100644
index 00000000000000..e27d333222a38a
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+ vmovd %xmm22, %xmm21
+
+// CHECK: vmovd %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+ vmovd.s %xmm22, %xmm21
+
+// CHECK: vmovw %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+ vmovw %xmm22, %xmm21
+
+// CHECK: vmovw %xmm22, %xmm21
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+ vmovw.s %xmm22, %xmm21
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
new file mode 100644
index 00000000000000..ed364d4402313d
--- /dev/null
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -0,0 +1,17 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vmovd xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
+ vmovd xmm21, xmm22
+
+// CHECK: vmovd xmm21, xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
+ vmovd.s xmm21, xmm22
+
+// CHECK: vmovw xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
+ vmovw xmm21, xmm22
+
+// CHECK: vmovw xmm21, xmm22
+// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
+ vmovw.s xmm21, xmm22
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index e85708ac1cc458..412c568677d986 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -1614,8 +1614,10 @@ static const X86FoldTableEntry Table1[] = {
{X86::VMOVUPSZrr, X86::VMOVUPSZrm, 0},
{X86::VMOVUPSrr, X86::VMOVUPSrm, 0},
{X86::VMOVW2SHrr, X86::VMOVWrm, TB_NO_REVERSE},
+ {X86::VMOVZPDILo2PDIZrr, X86::VMOVZPDILo2PDIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIZrr, X86::VMOVQI2PQIZrm, TB_NO_REVERSE},
{X86::VMOVZPQILo2PQIrr, X86::VMOVQI2PQIrm, TB_NO_REVERSE},
+ {X86::VMOVZPWILo2PWIZrr, X86::VMOVZPWILo2PWIZrm, TB_NO_REVERSE},
{X86::VPABSBYrr, X86::VPABSBYrm, 0},
{X86::VPABSBZ128rr, X86::VPABSBZ128rm, 0},
{X86::VPABSBZ256rr, X86::VPABSBZ256rm, 0},
diff --git a/llvm/utils/TableGen/X86ManualInstrMapping.def b/llvm/utils/TableGen/X86ManualInstrMapping.def
index d76c404722b0ac..bc539d792f38df 100644
--- a/llvm/utils/TableGen/X86ManualInstrMapping.def
+++ b/llvm/utils/TableGen/X86ManualInstrMapping.def
@@ -32,6 +32,7 @@ NOCOMP(VPSRAQZ128ri)
NOCOMP(VPSRAQZ128rm)
NOCOMP(VPSRAQZ128rr)
NOCOMP(VSCALEFPSZ128rm)
+NOCOMP(VMOVZPDILo2PDIZrr)
NOCOMP(VDBPSADBWZ256rmi)
NOCOMP(VDBPSADBWZ256rri)
NOCOMP(VPMAXSQZ256rm)
>From f3774cdcbe3f97c7652cb606884d4077d905e330 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Fri, 13 Sep 2024 10:37:39 -0700
Subject: [PATCH 2/6] update test
---
llvm/test/MC/X86/avx10.2-copy-32-att.s | 89 ++++++++++++++++---
llvm/test/MC/X86/avx10.2-copy-32-intel.s | 88 ++++++++++++++++---
llvm/test/MC/X86/avx10.2-copy-64-att.s | 104 ++++++++++++++++++++---
llvm/test/MC/X86/avx10.2-copy-64-intel.s | 104 ++++++++++++++++++++---
4 files changed, 337 insertions(+), 48 deletions(-)
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-att.s b/llvm/test/MC/X86/avx10.2-copy-32-att.s
index a77f19a5dce542..2bc498720849c9 100644
--- a/llvm/test/MC/X86/avx10.2-copy-32-att.s
+++ b/llvm/test/MC/X86/avx10.2-copy-32-att.s
@@ -1,17 +1,82 @@
// RUN: llvm-mc -triple i386 --show-encoding %s | FileCheck %s
-// CHECK: vmovd %xmm2, %xmm1
-// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
- vmovd %xmm2, %xmm1
+// CHECK: vmovd 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmovd 268435456(%esp,%esi,8), %xmm2
-// CHECK: vmovd %xmm2, %xmm1
-// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
- vmovd.s %xmm2, %xmm1
+// CHECK: vmovd 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmovd 291(%edi,%eax,4), %xmm2
-// CHECK: vmovw %xmm2, %xmm1
-// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
- vmovw %xmm2, %xmm1
+// CHECK: vmovd (%eax), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x10]
+ vmovd (%eax), %xmm2
+
+// CHECK: vmovd -128(,%ebp,2), %xmm2
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff]
+ vmovd -128(,%ebp,2), %xmm2
+
+// CHECK: vmovd %xmm3, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ vmovd %xmm3, 268435456(%esp,%esi,8)
+
+// CHECK: vmovd %xmm3, 291(%edi,%eax,4)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+ vmovd %xmm3, 291(%edi,%eax,4)
+
+// CHECK: vmovd %xmm3, (%eax)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x18]
+ vmovd %xmm3, (%eax)
+
+// CHECK: vmovd %xmm3, -128(,%ebp,2)
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff]
+ vmovd %xmm3, -128(,%ebp,2)
+
+// CHECK: vmovw 268435456(%esp,%esi,8), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmovw 268435456(%esp,%esi,8), %xmm2
+
+// CHECK: vmovw 291(%edi,%eax,4), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmovw 291(%edi,%eax,4), %xmm2
+
+// CHECK: vmovw (%eax), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10]
+ vmovw (%eax), %xmm2
+
+// CHECK: vmovw -64(,%ebp,2), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw -64(,%ebp,2), %xmm2
+
+// CHECK: vmovw 254(%ecx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f]
+ vmovw 254(%ecx), %xmm2
+
+// CHECK: vmovw -256(%edx), %xmm2
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80]
+ vmovw -256(%edx), %xmm2
+
+// CHECK: vmovw %xmm3, 268435456(%esp,%esi,8)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ vmovw %xmm3, 268435456(%esp,%esi,8)
+
+// CHECK: vmovw %xmm3, 291(%edi,%eax,4)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+ vmovw %xmm3, 291(%edi,%eax,4)
+
+// CHECK: vmovw %xmm3, (%eax)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18]
+ vmovw %xmm3, (%eax)
+
+// CHECK: vmovw %xmm3, -64(,%ebp,2)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw %xmm3, -64(,%ebp,2)
+
+// CHECK: vmovw %xmm3, 254(%ecx)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f]
+ vmovw %xmm3, 254(%ecx)
+
+// CHECK: vmovw %xmm3, -256(%edx)
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80]
+ vmovw %xmm3, -256(%edx)
-// CHECK: vmovw %xmm2, %xmm1
-// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
- vmovw.s %xmm2, %xmm1
diff --git a/llvm/test/MC/X86/avx10.2-copy-32-intel.s b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
index 222dc2f939c77a..aa84548e5f75dd 100644
--- a/llvm/test/MC/X86/avx10.2-copy-32-intel.s
+++ b/llvm/test/MC/X86/avx10.2-copy-32-intel.s
@@ -1,17 +1,81 @@
// RUN: llvm-mc -triple i386 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
-// CHECK: vmovd xmm1, xmm2
-// CHECK: encoding: [0x62,0xf1,0x7e,0x08,0x7e,0xca]
- vmovd xmm1, xmm2
+// CHECK: vmovd xmm2, dword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmovd xmm2, dword ptr [esp + 8*esi + 268435456]
-// CHECK: vmovd xmm1, xmm2
-// CHECK: encoding: [0x62,0xf1,0x7d,0x08,0xd6,0xca]
- vmovd.s xmm1, xmm2
+// CHECK: vmovd xmm2, dword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmovd xmm2, dword ptr [edi + 4*eax + 291]
-// CHECK: vmovw xmm1, xmm2
-// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x6e,0xca]
- vmovw xmm1, xmm2
+// CHECK: vmovd xmm2, dword ptr [eax]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x10]
+ vmovd xmm2, dword ptr [eax]
-// CHECK: vmovw xmm1, xmm2
-// CHECK: encoding: [0x62,0xf5,0x7e,0x08,0x7e,0xca]
- vmovw.s xmm1, xmm2
+// CHECK: vmovd xmm2, dword ptr [2*ebp - 128]
+// CHECK: encoding: [0xc5,0xf9,0x6e,0x14,0x6d,0x80,0xff,0xff,0xff]
+ vmovd xmm2, dword ptr [2*ebp - 128]
+
+// CHECK: vmovd dword ptr [esp + 8*esi + 268435456], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ vmovd dword ptr [esp + 8*esi + 268435456], xmm3
+
+// CHECK: vmovd dword ptr [edi + 4*eax + 291], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+ vmovd dword ptr [edi + 4*eax + 291], xmm3
+
+// CHECK: vmovd dword ptr [eax], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x18]
+ vmovd dword ptr [eax], xmm3
+
+// CHECK: vmovd dword ptr [2*ebp - 128], xmm3
+// CHECK: encoding: [0xc5,0xf9,0x7e,0x1c,0x6d,0x80,0xff,0xff,0xff]
+ vmovd dword ptr [2*ebp - 128], xmm3
+
+// CHECK: vmovw xmm2, word ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vmovw xmm2, word ptr [esp + 8*esi + 268435456]
+
+// CHECK: vmovw xmm2, word ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x94,0x87,0x23,0x01,0x00,0x00]
+ vmovw xmm2, word ptr [edi + 4*eax + 291]
+
+// CHECK: vmovw xmm2, word ptr [eax]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x10]
+ vmovw xmm2, word ptr [eax]
+
+// CHECK: vmovw xmm2, word ptr [2*ebp - 64]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x14,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw xmm2, word ptr [2*ebp - 64]
+
+// CHECK: vmovw xmm2, word ptr [ecx + 254]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x51,0x7f]
+ vmovw xmm2, word ptr [ecx + 254]
+
+// CHECK: vmovw xmm2, word ptr [edx - 256]
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x6e,0x52,0x80]
+ vmovw xmm2, word ptr [edx - 256]
+
+// CHECK: vmovw word ptr [esp + 8*esi + 268435456], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0xf4,0x00,0x00,0x00,0x10]
+ vmovw word ptr [esp + 8*esi + 268435456], xmm3
+
+// CHECK: vmovw word ptr [edi + 4*eax + 291], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x9c,0x87,0x23,0x01,0x00,0x00]
+ vmovw word ptr [edi + 4*eax + 291], xmm3
+
+// CHECK: vmovw word ptr [eax], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x18]
+ vmovw word ptr [eax], xmm3
+
+// CHECK: vmovw word ptr [2*ebp - 64], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x1c,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw word ptr [2*ebp - 64], xmm3
+
+// CHECK: vmovw word ptr [ecx + 254], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x59,0x7f]
+ vmovw word ptr [ecx + 254], xmm3
+
+// CHECK: vmovw word ptr [edx - 256], xmm3
+// CHECK: encoding: [0x62,0xf5,0x7d,0x08,0x7e,0x5a,0x80]
+ vmovw word ptr [edx - 256], xmm3
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-att.s b/llvm/test/MC/X86/avx10.2-copy-64-att.s
index e27d333222a38a..a672b2d842240c 100644
--- a/llvm/test/MC/X86/avx10.2-copy-64-att.s
+++ b/llvm/test/MC/X86/avx10.2-copy-64-att.s
@@ -1,17 +1,97 @@
// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
-// CHECK: vmovd %xmm22, %xmm21
-// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
- vmovd %xmm22, %xmm21
+// CHECK: vmovd 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovd 268435456(%rbp,%r14,8), %xmm22
-// CHECK: vmovd %xmm22, %xmm21
-// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
- vmovd.s %xmm22, %xmm21
+// CHECK: vmovd 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovd 291(%r8,%rax,4), %xmm22
-// CHECK: vmovw %xmm22, %xmm21
-// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
- vmovw %xmm22, %xmm21
+// CHECK: vmovd (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+ vmovd (%rip), %xmm22
-// CHECK: vmovw %xmm22, %xmm21
-// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
- vmovw.s %xmm22, %xmm21
+// CHECK: vmovd -128(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff]
+ vmovd -128(,%rbp,2), %xmm22
+
+// CHECK: vmovd 508(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f]
+ vmovd 508(%rcx), %xmm22
+
+// CHECK: vmovd -512(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80]
+ vmovd -512(%rdx), %xmm22
+
+// CHECK: vmovd %xmm23, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+ vmovd %xmm23, 268435456(%rbp,%r14,8)
+
+// CHECK: vmovd %xmm23, 291(%r8,%rax,4)
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+ vmovd %xmm23, 291(%r8,%rax,4)
+
+// CHECK: vmovd %xmm23, (%rip)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+ vmovd %xmm23, (%rip)
+
+// CHECK: vmovd %xmm23, -128(,%rbp,2)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff]
+ vmovd %xmm23, -128(,%rbp,2)
+
+// CHECK: vmovd %xmm23, 508(%rcx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f]
+ vmovd %xmm23, 508(%rcx)
+
+// CHECK: vmovd %xmm23, -512(%rdx)
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80]
+ vmovd %xmm23, -512(%rdx)
+
+// CHECK: vmovw 268435456(%rbp,%r14,8), %xmm22
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovw 268435456(%rbp,%r14,8), %xmm22
+
+// CHECK: vmovw 291(%r8,%rax,4), %xmm22
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovw 291(%r8,%rax,4), %xmm22
+
+// CHECK: vmovw (%rip), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+ vmovw (%rip), %xmm22
+
+// CHECK: vmovw -64(,%rbp,2), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw -64(,%rbp,2), %xmm22
+
+// CHECK: vmovw 254(%rcx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f]
+ vmovw 254(%rcx), %xmm22
+
+// CHECK: vmovw -256(%rdx), %xmm22
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80]
+ vmovw -256(%rdx), %xmm22
+
+// CHECK: vmovw %xmm23, 268435456(%rbp,%r14,8)
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+ vmovw %xmm23, 268435456(%rbp,%r14,8)
+
+// CHECK: vmovw %xmm23, 291(%r8,%rax,4)
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+ vmovw %xmm23, 291(%r8,%rax,4)
+
+// CHECK: vmovw %xmm23, (%rip)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+ vmovw %xmm23, (%rip)
+
+// CHECK: vmovw %xmm23, -64(,%rbp,2)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw %xmm23, -64(,%rbp,2)
+
+// CHECK: vmovw %xmm23, 254(%rcx)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f]
+ vmovw %xmm23, 254(%rcx)
+
+// CHECK: vmovw %xmm23, -256(%rdx)
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80]
+ vmovw %xmm23, -256(%rdx)
diff --git a/llvm/test/MC/X86/avx10.2-copy-64-intel.s b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
index ed364d4402313d..4fd7b67dfa5db5 100644
--- a/llvm/test/MC/X86/avx10.2-copy-64-intel.s
+++ b/llvm/test/MC/X86/avx10.2-copy-64-intel.s
@@ -1,17 +1,97 @@
// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
-// CHECK: vmovd xmm21, xmm22
-// CHECK: encoding: [0x62,0xa1,0x7e,0x08,0x7e,0xee]
- vmovd xmm21, xmm22
+// CHECK: vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovd xmm22, dword ptr [rbp + 8*r14 + 268435456]
-// CHECK: vmovd xmm21, xmm22
-// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0xd6,0xee]
- vmovd.s xmm21, xmm22
+// CHECK: vmovd xmm22, dword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovd xmm22, dword ptr [r8 + 4*rax + 291]
-// CHECK: vmovw xmm21, xmm22
-// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x6e,0xee]
- vmovw xmm21, xmm22
+// CHECK: vmovd xmm22, dword ptr [rip]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+ vmovd xmm22, dword ptr [rip]
-// CHECK: vmovw xmm21, xmm22
-// CHECK: encoding: [0x62,0xa5,0x7e,0x08,0x7e,0xee]
- vmovw.s xmm21, xmm22
+// CHECK: vmovd xmm22, dword ptr [2*rbp - 128]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x34,0x6d,0x80,0xff,0xff,0xff]
+ vmovd xmm22, dword ptr [2*rbp - 128]
+
+// CHECK: vmovd xmm22, dword ptr [rcx + 508]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x71,0x7f]
+ vmovd xmm22, dword ptr [rcx + 508]
+
+// CHECK: vmovd xmm22, dword ptr [rdx - 512]
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x6e,0x72,0x80]
+ vmovd xmm22, dword ptr [rdx - 512]
+
+// CHECK: vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23
+// CHECK: encoding: [0x62,0xa1,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+ vmovd dword ptr [rbp + 8*r14 + 268435456], xmm23
+
+// CHECK: vmovd dword ptr [r8 + 4*rax + 291], xmm23
+// CHECK: encoding: [0x62,0xc1,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+ vmovd dword ptr [r8 + 4*rax + 291], xmm23
+
+// CHECK: vmovd dword ptr [rip], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+ vmovd dword ptr [rip], xmm23
+
+// CHECK: vmovd dword ptr [2*rbp - 128], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x3c,0x6d,0x80,0xff,0xff,0xff]
+ vmovd dword ptr [2*rbp - 128], xmm23
+
+// CHECK: vmovd dword ptr [rcx + 508], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x79,0x7f]
+ vmovd dword ptr [rcx + 508], xmm23
+
+// CHECK: vmovd dword ptr [rdx - 512], xmm23
+// CHECK: encoding: [0x62,0xe1,0x7d,0x08,0x7e,0x7a,0x80]
+ vmovd dword ptr [rdx - 512], xmm23
+
+// CHECK: vmovw xmm22, word ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x6e,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vmovw xmm22, word ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vmovw xmm22, word ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x6e,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vmovw xmm22, word ptr [r8 + 4*rax + 291]
+
+// CHECK: vmovw xmm22, word ptr [rip]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x35,0x00,0x00,0x00,0x00]
+ vmovw xmm22, word ptr [rip]
+
+// CHECK: vmovw xmm22, word ptr [2*rbp - 64]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x34,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw xmm22, word ptr [2*rbp - 64]
+
+// CHECK: vmovw xmm22, word ptr [rcx + 254]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x71,0x7f]
+ vmovw xmm22, word ptr [rcx + 254]
+
+// CHECK: vmovw xmm22, word ptr [rdx - 256]
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x6e,0x72,0x80]
+ vmovw xmm22, word ptr [rdx - 256]
+
+// CHECK: vmovw word ptr [rbp + 8*r14 + 268435456], xmm23
+// CHECK: encoding: [0x62,0xa5,0x7d,0x08,0x7e,0xbc,0xf5,0x00,0x00,0x00,0x10]
+ vmovw word ptr [rbp + 8*r14 + 268435456], xmm23
+
+// CHECK: vmovw word ptr [r8 + 4*rax + 291], xmm23
+// CHECK: encoding: [0x62,0xc5,0x7d,0x08,0x7e,0xbc,0x80,0x23,0x01,0x00,0x00]
+ vmovw word ptr [r8 + 4*rax + 291], xmm23
+
+// CHECK: vmovw word ptr [rip], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3d,0x00,0x00,0x00,0x00]
+ vmovw word ptr [rip], xmm23
+
+// CHECK: vmovw word ptr [2*rbp - 64], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x3c,0x6d,0xc0,0xff,0xff,0xff]
+ vmovw word ptr [2*rbp - 64], xmm23
+
+// CHECK: vmovw word ptr [rcx + 254], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x79,0x7f]
+ vmovw word ptr [rcx + 254], xmm23
+
+// CHECK: vmovw word ptr [rdx - 256], xmm23
+// CHECK: encoding: [0x62,0xe5,0x7d,0x08,0x7e,0x7a,0x80]
+ vmovw word ptr [rdx - 256], xmm23
>From 64254780214d5d96ac12b220d421652f79ef78b4 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Fri, 13 Sep 2024 10:43:22 -0700
Subject: [PATCH 3/6] remove linebreak warning
---
clang/lib/Headers/avx10_2copyintrin.h | 2 +-
llvm/lib/Target/X86/X86InstrAVX10.td | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Headers/avx10_2copyintrin.h b/clang/lib/Headers/avx10_2copyintrin.h
index 13e76c6abe8993..7fc31190781d91 100644
--- a/clang/lib/Headers/avx10_2copyintrin.h
+++ b/clang/lib/Headers/avx10_2copyintrin.h
@@ -31,4 +31,4 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_move_epi16(__m128i __A) {
#undef __DEFAULT_FN_ATTRS128
-#endif // __AVX10_2COPYINTRIN_H
\ No newline at end of file
+#endif // __AVX10_2COPYINTRIN_H
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index f66705a5a3de35..2dc65e792f83e2 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1600,4 +1600,4 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
Sched<[WriteVecMoveFromGpr]>;
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
-}
\ No newline at end of file
+}
>From a7992f8a21de207c6bb8632a9962e43b6fad2f06 Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Mon, 16 Sep 2024 22:19:27 -0700
Subject: [PATCH 4/6] remove f16 check
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +--
llvm/test/CodeGen/X86/avx512fp16-mov.ll | 18 +++++++++---------
2 files changed, 10 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6e8c8ca3c44d0a..69defcb2f5ab0f 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38342,8 +38342,7 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
- (MaskEltSize == 16 &&
- (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
+ (MaskEltSize == 16 && Subtarget.hasAVX10_2())) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
index f4eb5b952ae436..f0b520ed095e98 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -2094,14 +2094,14 @@ for.end: ; preds = %for.body.preheader,
define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind {
; X64-LABEL: pr52561:
; X64: # %bb.0:
-; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm1
; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm1
; X64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [112,112,112,112,112,112,112,112]
-; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; X64-NEXT: vmovd {{.*#+}} xmm2 = [65535,0,0,0,0,0,0,0]
+; X64-NEXT: vpand %ymm2, %ymm0, %ymm0
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
-; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: pr52561:
@@ -2113,11 +2113,11 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width
; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1
; X86-NEXT: vpbroadcastd {{.*#+}} ymm2 = [112,112,112,112,112,112,112,112]
-; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
+; X86-NEXT: vmovd {{.*#+}} xmm2 = [65535,0,0,0,0,0,0,0]
+; X86-NEXT: vpand %ymm2, %ymm0, %ymm0
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
-; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
-; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
@@ -2139,9 +2139,9 @@ define <8 x i16> @pr59628_xmm(i16 %arg) {
; X86-LABEL: pr59628_xmm:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0
; X86-NEXT: vpbroadcastw %eax, %xmm1
-; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0
+; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
; X86-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %k1
; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
>From 37513f26f17172fbdfe68867018bf0755cea0edc Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Tue, 17 Sep 2024 02:42:11 -0700
Subject: [PATCH 5/6] Revert "remove f16 check"
This reverts commit be0013472904aaa960ff1b5fe1add5b5be79973d.
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 3 ++-
llvm/test/CodeGen/X86/avx512fp16-mov.ll | 18 +++++++++---------
2 files changed, 11 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 69defcb2f5ab0f..6e8c8ca3c44d0a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38342,7 +38342,8 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
- (MaskEltSize == 16 && Subtarget.hasAVX10_2())) &&
+ (MaskEltSize == 16 &&
+ (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
diff --git a/llvm/test/CodeGen/X86/avx512fp16-mov.ll b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
index f0b520ed095e98..f4eb5b952ae436 100644
--- a/llvm/test/CodeGen/X86/avx512fp16-mov.ll
+++ b/llvm/test/CodeGen/X86/avx512fp16-mov.ll
@@ -2094,14 +2094,14 @@ for.end: ; preds = %for.body.preheader,
define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width"="256" "prefer-vector-width"="256" nounwind {
; X64-LABEL: pr52561:
; X64: # %bb.0:
-; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X64-NEXT: vpaddd %ymm3, %ymm1, %ymm1
+; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X64-NEXT: vpbroadcastd {{.*#+}} ymm2 = [112,112,112,112,112,112,112,112]
-; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; X64-NEXT: vpaddd %ymm2, %ymm0, %ymm0
-; X64-NEXT: vmovd {{.*#+}} xmm2 = [65535,0,0,0,0,0,0,0]
-; X64-NEXT: vpand %ymm2, %ymm0, %ymm0
+; X64-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm1
+; X64-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X64-NEXT: vmovsh %xmm0, %xmm2, %xmm0
; X64-NEXT: retq
;
; X86-LABEL: pr52561:
@@ -2113,11 +2113,11 @@ define <16 x i32> @pr52561(<16 x i32> %a, <16 x i32> %b) "min-legal-vector-width
; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
; X86-NEXT: vpaddd 8(%ebp), %ymm1, %ymm1
; X86-NEXT: vpbroadcastd {{.*#+}} ymm2 = [112,112,112,112,112,112,112,112]
-; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; X86-NEXT: vpaddd %ymm2, %ymm0, %ymm0
-; X86-NEXT: vmovd {{.*#+}} xmm2 = [65535,0,0,0,0,0,0,0]
-; X86-NEXT: vpand %ymm2, %ymm0, %ymm0
+; X86-NEXT: vpaddd %ymm2, %ymm1, %ymm1
; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1
+; X86-NEXT: vpxor %xmm2, %xmm2, %xmm2
+; X86-NEXT: vmovsh %xmm0, %xmm2, %xmm0
; X86-NEXT: movl %ebp, %esp
; X86-NEXT: popl %ebp
; X86-NEXT: retl
@@ -2139,9 +2139,9 @@ define <8 x i16> @pr59628_xmm(i16 %arg) {
; X86-LABEL: pr59628_xmm:
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0
; X86-NEXT: vpbroadcastw %eax, %xmm1
-; X86-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
+; X86-NEXT: vmovsh %xmm1, %xmm0, %xmm0
; X86-NEXT: vpcmpneqw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %k1
; X86-NEXT: vmovdqu16 %xmm0, %xmm0 {%k1} {z}
; X86-NEXT: retl
>From e4ab22fb3123e00f9bd2aff255d5e8baa5b74b1a Mon Sep 17 00:00:00 2001
From: mattarde <mattarde at intel.com>
Date: Tue, 17 Sep 2024 02:59:41 -0700
Subject: [PATCH 6/6] revert fp16 or avx102
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6e8c8ca3c44d0a..5c2c3dfb232e45 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -38342,8 +38342,7 @@ static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask,
// Match against a VZEXT_MOVL instruction, SSE1 only supports 32-bits (MOVSS).
if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2()) ||
- (MaskEltSize == 16 &&
- (Subtarget.hasFP16() || Subtarget.hasAVX10_2()))) &&
+ (MaskEltSize == 16 && Subtarget.hasFP16())) &&
isUndefOrEqual(Mask[0], 0) &&
isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) {
Shuffle = X86ISD::VZEXT_MOVL;
More information about the llvm-commits
mailing list