[clang] [llvm] [X86] Support SM4 EVEX version intrinsics/instructions. (PR #113402)
Freddy Ye via llvm-commits
llvm-commits at lists.llvm.org
Sun Oct 27 18:59:51 PDT 2024
https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/113402
>From 83a2ef421831dec1790c3c6adf3141ed1ac9a0d5 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 21 Oct 2024 09:30:26 +0800
Subject: [PATCH 1/5] [X86] Support SM4 EVEX version intrinsics/instructions.
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368
---
clang/include/clang/Basic/BuiltinsX86.def | 4 +
clang/lib/Headers/CMakeLists.txt | 1 +
clang/lib/Headers/immintrin.h | 3 +
clang/lib/Headers/sm4evexintrin.h | 32 +++
clang/test/CodeGen/X86/sm4-evex-builtins.c | 19 ++
llvm/docs/ReleaseNotes.md | 2 +
llvm/include/llvm/IR/IntrinsicsX86.td | 6 +
llvm/lib/Target/X86/X86InstrAVX10.td | 20 ++
llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll | 64 +++++
llvm/test/MC/Disassembler/X86/sm4-evex-32.txt | 170 +++++++++++++
llvm/test/MC/Disassembler/X86/sm4-evex-64.txt | 170 +++++++++++++
llvm/test/MC/X86/sm4-evex-32-att.s | 224 ++++++++++++++++++
llvm/test/MC/X86/sm4-evex-32-intel.s | 169 +++++++++++++
llvm/test/MC/X86/sm4-evex-64-att.s | 169 +++++++++++++
llvm/test/MC/X86/sm4-evex-64-intel.s | 169 +++++++++++++
llvm/test/TableGen/x86-fold-tables.inc | 6 +
16 files changed, 1228 insertions(+)
create mode 100644 clang/lib/Headers/sm4evexintrin.h
create mode 100644 clang/test/CodeGen/X86/sm4-evex-builtins.c
create mode 100644 llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
create mode 100644 llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
create mode 100644 llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
create mode 100644 llvm/test/MC/X86/sm4-evex-32-att.s
create mode 100644 llvm/test/MC/X86/sm4-evex-32-intel.s
create mode 100644 llvm/test/MC/X86/sm4-evex-64-att.s
create mode 100644 llvm/test/MC/X86/sm4-evex-64-intel.s
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421ca..4486eb73a11fa6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
+// SM4_EVEX
+TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+
// AVX10 MINMAX
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..6a594dad0b67d2 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -241,6 +241,7 @@ set(x86_files
shaintrin.h
sm3intrin.h
sm4intrin.h
+ sm4evexintrin.h
smmintrin.h
tbmintrin.h
tmmintrin.h
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..1b83dd2162707c 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -665,6 +665,9 @@ _storebe_i64(void * __P, long long __D) {
#include <avx10_2_512niintrin.h>
#include <avx10_2_512satcvtdsintrin.h>
#include <avx10_2_512satcvtintrin.h>
+#if (defined(__SM4__))
+#include <sm4evexintrin.h>
+#endif
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
diff --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h
new file mode 100644
index 00000000000000..f6ae0037baea03
--- /dev/null
+++ b/clang/lib/Headers/sm4evexintrin.h
@@ -0,0 +1,32 @@
+/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __SM4EVEXINTRIN_H
+#define __SM4EVEXINTRIN_H
+
+#define __DEFAULT_FN_ATTRS512 \
+ __attribute__((__always_inline__, __nodebug__, \
+ __target__("sm4,avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+ return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif // __SM4EVEXINTRIN_H
diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c
new file mode 100644
index 00000000000000..0e54bd008d4fb0
--- /dev/null
+++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_sm4key4_epi32(
+ // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ return _mm512_sm4key4_epi32(__A, __B);
+}
+
+__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+ // CHECK-LABEL: @test_mm512_sm4rnds4_epi32(
+ // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+ return _mm512_sm4rnds4_epi32(__A, __B);
+}
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index e5853789c78b63..16764210537689 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -196,6 +196,8 @@ Changes to the X86 Backend
* Support ISA of `AVX10.2-256` and `AVX10.2-512`.
+* Support ISA of `SM4(EVEX)`.
+
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5262e3154ff721..7725bda1f4f598 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6109,6 +6109,12 @@ let TargetPrefix = "x86" in {
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
+def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+ [IntrNoMem]>;
+def int_x86_vsm4rnds4512 : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+ [IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
// RAO-INT intrinsics
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..640011f5ed28d7 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1647,3 +1647,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
(VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
}
+
+// SM4(EVEX)
+multiclass avx10_sm4_base<string OpStr> {
+ // SM4_Base is in X86InstrSSE.td.
+ let Predicates = [HasSM4, HasAVX10_2] in {
+ defm Z128 : SM4_Base<OpStr, avx512vl_i32_info.info128.RC,
+ "128", avx512vl_i32_info.info128.LdFrag,
+ avx512vl_i32_info.info128.MemOp>, EVEX_V128;
+ defm Z256 : SM4_Base<OpStr, avx512vl_i32_info.info256.RC,
+ "256", avx512vl_i32_info.info256.LdFrag,
+ avx512vl_i32_info.info256.MemOp>, EVEX_V256;
+ }
+ let Predicates = [HasSM4, HasAVX10_2_512] in
+ defm Z : SM4_Base<OpStr, avx512vl_i32_info.info512.RC,
+ "512", avx512vl_i32_info.info512.LdFrag,
+ avx512vl_i32_info.info512.MemOp>, EVEX_V512;
+}
+
+defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
+defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;
diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
new file mode 100644
index 00000000000000..fc46d3cf23fd41
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+
+define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4key4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+ ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4key4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+ ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+ ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+
+define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4128:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4rnds4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+ ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4256:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4rnds4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+ ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4512:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+ ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
new file mode 100644
index 00000000000000..f89f4b5a8c0fb8
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vsm4key4 %zmm24, %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0xda,0xf0
+
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%rip), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4key4 8128(%rcx), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
+
+# ATT: vsm4key4 -8192(%rdx), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x46,0x40,0xda,0x72,0x80
+
+# ATT: vsm4rnds4 %zmm24, %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0xda,0xf0
+
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%rip), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4rnds4 8128(%rcx), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
+
+# ATT: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x47,0x40,0xda,0x72,0x80
+
+# ATT: vsm4key4 %ymm24, %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0xda,0xf0
+
+# ATT: vsm4key4 %xmm24, %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0xda,0xf0
+
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%rip), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsm4key4 4064(%rcx), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
+
+# ATT: vsm4key4 -4096(%rdx), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x46,0x20,0xda,0x72,0x80
+
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%rip), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsm4key4 2032(%rcx), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
+
+# ATT: vsm4key4 -2048(%rdx), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x46,0x00,0xda,0x72,0x80
+
+# ATT: vsm4rnds4 %ymm24, %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0xda,0xf0
+
+# ATT: vsm4rnds4 %xmm24, %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0xda,0xf0
+
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%rip), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsm4rnds4 4064(%rcx), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
+
+# ATT: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x47,0x20,0xda,0x72,0x80
+
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%rip), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsm4rnds4 2032(%rcx), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
+
+# ATT: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x47,0x00,0xda,0x72,0x80
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
new file mode 100644
index 00000000000000..c1cb271a967b13
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vsm4key4 %zmm4, %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0xda,0xd4
+
+# ATT: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%eax), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x66,0x48,0xda,0x10
+
+# ATT: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4key4 8128(%ecx), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
+
+# ATT: vsm4key4 -8192(%edx), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x66,0x48,0xda,0x52,0x80
+
+# ATT: vsm4rnds4 %zmm4, %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0xda,0xd4
+
+# ATT: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%eax), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x67,0x48,0xda,0x10
+
+# ATT: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT: vsm4rnds4 8128(%ecx), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
+
+# ATT: vsm4rnds4 -8192(%edx), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x67,0x48,0xda,0x52,0x80
+
+# ATT: vsm4key4 %ymm4, %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0xda,0xd4
+
+# ATT: vsm4key4 %xmm4, %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0xda,0xd4
+
+# ATT: vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%eax), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x66,0x28,0xda,0x10
+
+# ATT: vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsm4key4 4064(%ecx), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x66,0x28,0xda,0x51,0x7f
+
+# ATT: vsm4key4 -4096(%edx), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x66,0x28,0xda,0x52,0x80
+
+# ATT: vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4key4 (%eax), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x66,0x08,0xda,0x10
+
+# ATT: vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsm4key4 2032(%ecx), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x66,0x08,0xda,0x51,0x7f
+
+# ATT: vsm4key4 -2048(%edx), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x66,0x08,0xda,0x52,0x80
+
+# ATT: vsm4rnds4 %ymm4, %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymm4
+0x62,0xf2,0x67,0x28,0xda,0xd4
+
+# ATT: vsm4rnds4 %xmm4, %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmm4
+0x62,0xf2,0x67,0x08,0xda,0xd4
+
+# ATT: vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%eax), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x67,0x28,0xda,0x10
+
+# ATT: vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT: vsm4rnds4 4064(%ecx), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x67,0x28,0xda,0x51,0x7f
+
+# ATT: vsm4rnds4 -4096(%edx), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x67,0x28,0xda,0x52,0x80
+
+# ATT: vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT: vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT: vsm4rnds4 (%eax), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x67,0x08,0xda,0x10
+
+# ATT: vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT: vsm4rnds4 2032(%ecx), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x67,0x08,0xda,0x51,0x7f
+
+# ATT: vsm4rnds4 -2048(%edx), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x67,0x08,0xda,0x52,0x80
diff --git a/llvm/test/MC/X86/sm4-evex-32-att.s b/llvm/test/MC/X86/sm4-evex-32-att.s
new file mode 100644
index 00000000000000..389a29b1189795
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-32-att.s
@@ -0,0 +1,224 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vsm4key4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+ vsm4key4 %zmm24, %zmm23, %zmm22
+
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
+
+// CHECK: vsm4key4 (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %zmm23, %zmm22
+
+// CHECK: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vsm4key4 8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+ vsm4key4 8128(%rcx), %zmm23, %zmm22
+
+// CHECK: vsm4key4 -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+ vsm4key4 -8192(%rdx), %zmm23, %zmm22
+
+// CHECK: vsm4rnds4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+ vsm4rnds4 %zmm24, %zmm23, %zmm22
+
+// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
+
+// CHECK: vsm4rnds4 (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 (%rip), %zmm23, %zmm22
+
+// CHECK: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK: vsm4rnds4 8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+ vsm4rnds4 8128(%rcx), %zmm23, %zmm22
+
+// CHECK: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+ vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
+
+// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+ vsm4key4 %ymm24, %ymm23, %ymm22
+
+// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+ vsm4key4 %xmm24, %xmm23, %xmm22
+
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK: vsm4key4 (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %ymm23, %ymm22
+
+// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+ vsm4key4 4064(%rcx), %ymm23, %ymm22
+
+// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+ vsm4key4 -4096(%rdx), %ymm23, %ymm22
+
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK: vsm4key4 (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %xmm23, %xmm22
+
+// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+ vsm4key4 2032(%rcx), %xmm23, %xmm22
+
+// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+ vsm4key4 -2048(%rdx), %xmm23, %xmm22
+// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+ vsm4key4 %ymm24, %ymm23, %ymm22
+
+// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+ vsm4key4 %xmm24, %xmm23, %xmm22
+
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK: vsm4key4 (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %ymm23, %ymm22
+
+// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+ vsm4key4 4064(%rcx), %ymm23, %ymm22
+
+// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+ vsm4key4 -4096(%rdx), %ymm23, %ymm22
+
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK: vsm4key4 (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %xmm23, %xmm22
+
+// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+ vsm4key4 2032(%rcx), %xmm23, %xmm22
+
+// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+ vsm4key4 -2048(%rdx), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+ vsm4rnds4 %ymm24, %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+ vsm4rnds4 %xmm24, %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 (%rip), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+ vsm4rnds4 4064(%rcx), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+ vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 (%rip), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+ vsm4rnds4 2032(%rcx), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+ vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
diff --git a/llvm/test/MC/X86/sm4-evex-32-intel.s b/llvm/test/MC/X86/sm4-evex-32-intel.s
new file mode 100644
index 00000000000000..3cc18cf4178ed8
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-32-intel.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vsm4key4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+ vsm4key4 zmm22, zmm23, zmm24
+
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+ vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+ vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+
+// CHECK: vsm4rnds4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+ vsm4rnds4 zmm22, zmm23, zmm24
+
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+
+// CHECK: vsm4key4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+ vsm4key4 ymm22, ymm23, ymm24
+
+// CHECK: vsm4key4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+ vsm4key4 xmm22, xmm23, xmm24
+
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+ vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+ vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+ vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+ vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+
+// CHECK: vsm4rnds4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+ vsm4rnds4 ymm22, ymm23, ymm24
+
+// CHECK: vsm4rnds4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+ vsm4rnds4 xmm22, xmm23, xmm24
+
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
diff --git a/llvm/test/MC/X86/sm4-evex-64-att.s b/llvm/test/MC/X86/sm4-evex-64-att.s
new file mode 100644
index 00000000000000..de10d95ac74d7b
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-64-att.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK: vsm4key4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+ vsm4key4 %zmm4, %zmm3, %zmm2
+
+// CHECK: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
+
+// CHECK: vsm4key4 (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+ vsm4key4 (%eax), %zmm3, %zmm2
+
+// CHECK: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vsm4key4 8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+ vsm4key4 8128(%ecx), %zmm3, %zmm2
+
+// CHECK: vsm4key4 -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+ vsm4key4 -8192(%edx), %zmm3, %zmm2
+
+// CHECK: vsm4rnds4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+ vsm4rnds4 %zmm4, %zmm3, %zmm2
+
+// CHECK: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
+
+// CHECK: vsm4rnds4 (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+ vsm4rnds4 (%eax), %zmm3, %zmm2
+
+// CHECK: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK: vsm4rnds4 8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+ vsm4rnds4 8128(%ecx), %zmm3, %zmm2
+
+// CHECK: vsm4rnds4 -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+ vsm4rnds4 -8192(%edx), %zmm3, %zmm2
+
+// CHECK: {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+ {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+
+// CHECK: {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+ {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+
+// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4key4 (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+ {evex} vsm4key4 (%eax), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+ {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+ {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4key4 (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+ {evex} vsm4key4 (%eax), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+ {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+ {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+ {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+ {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+ {evex} vsm4rnds4 (%eax), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+ {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+ {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+ {evex} vsm4rnds4 (%eax), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+ {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+ {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2
\ No newline at end of file
diff --git a/llvm/test/MC/X86/sm4-evex-64-intel.s b/llvm/test/MC/X86/sm4-evex-64-intel.s
new file mode 100644
index 00000000000000..812fdb13f80913
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-64-intel.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vsm4key4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+ vsm4key4 zmm2, zmm3, zmm4
+
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+ vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+ vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+ vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+
+// CHECK: vsm4rnds4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+ vsm4rnds4 zmm2, zmm3, zmm4
+
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+ {evex} vsm4key4 ymm2, ymm3, ymm4
+
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+ {evex} vsm4key4 xmm2, xmm3, xmm4
+
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+ {evex} vsm4rnds4 ymm2, ymm3, ymm4
+
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+ {evex} vsm4rnds4 xmm2, xmm3, xmm4
+
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 85d9b02ac0cbf1..43c206fa0af698 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -4113,8 +4113,14 @@ static const X86FoldTableEntry Table2[] = {
{X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0},
{X86::VSHUFPSrri, X86::VSHUFPSrmi, 0},
{X86::VSM4KEY4Yrr, X86::VSM4KEY4Yrm, 0},
+ {X86::VSM4KEY4Z128rr, X86::VSM4KEY4Z128rm, 0},
+ {X86::VSM4KEY4Z256rr, X86::VSM4KEY4Z256rm, 0},
+ {X86::VSM4KEY4Zrr, X86::VSM4KEY4Zrm, 0},
{X86::VSM4KEY4rr, X86::VSM4KEY4rm, 0},
{X86::VSM4RNDS4Yrr, X86::VSM4RNDS4Yrm, 0},
+ {X86::VSM4RNDS4Z128rr, X86::VSM4RNDS4Z128rm, 0},
+ {X86::VSM4RNDS4Z256rr, X86::VSM4RNDS4Z256rm, 0},
+ {X86::VSM4RNDS4Zrr, X86::VSM4RNDS4Zrm, 0},
{X86::VSM4RNDS4rr, X86::VSM4RNDS4rm, 0},
{X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mkz, 0},
{X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mkz, 0},
>From 23d4bfcabdca8968cba600e2fc0b440121990ef5 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 24 Oct 2024 13:43:59 +0800
Subject: [PATCH 2/5] address comments
---
clang/docs/ReleaseNotes.rst | 4 +
llvm/include/llvm/IR/IntrinsicsX86.td | 14 +-
llvm/lib/Target/X86/X86InstrAVX10.td | 12 +-
llvm/test/MC/Disassembler/X86/sm4-evex-32.txt | 256 +++++++-------
llvm/test/MC/Disassembler/X86/sm4-evex-64.txt | 256 +++++++-------
llvm/test/MC/X86/sm4-evex-32-att.s | 329 ++++++++----------
llvm/test/MC/X86/sm4-evex-32-intel.s | 254 +++++++-------
llvm/test/MC/X86/sm4-evex-64-att.s | 329 ++++++++++--------
llvm/test/MC/X86/sm4-evex-64-intel.s | 254 +++++++-------
9 files changed, 855 insertions(+), 853 deletions(-)
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b7a6ace8bb895d..adc62122774b90 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -606,6 +606,10 @@ X86 Support
* Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
``*_(mask(z)))_minmax_s[s|d|h]``.
+- Supported intrinsics for ``SM4 and AVX10.2``.
+ * Supported SM4 intrinsics of ``_mm512_sm4key4_epi32`` and
+ ``_mm512_sm4rnds4_epi32``.
+
- All intrinsics in adcintrin.h can now be used in constant expressions.
- All intrinsics in adxintrin.h can now be used in constant expressions.
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 7725bda1f4f598..34524dbff6c391 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6099,6 +6099,11 @@ let TargetPrefix = "x86" in {
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
+ def int_x86_vsm4key4512
+ : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+ DefaultAttrsIntrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty],
+ [IntrNoMem]>;
def int_x86_vsm4rnds4128
: ClangBuiltin<"__builtin_ia32_vsm4rnds4128">,
DefaultAttrsIntrinsic<[llvm_v4i32_ty],
@@ -6109,11 +6114,10 @@ let TargetPrefix = "x86" in {
DefaultAttrsIntrinsic<[llvm_v8i32_ty],
[llvm_v8i32_ty, llvm_v8i32_ty],
[IntrNoMem]>;
-def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
- [IntrNoMem]>;
-def int_x86_vsm4rnds4512 : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
- Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+ def int_x86_vsm4rnds4512
+ : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
+ DefaultAttrsIntrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_v16i32_ty],
[IntrNoMem]>;
}
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 640011f5ed28d7..d6fab07ca2aaf1 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1652,17 +1652,11 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
multiclass avx10_sm4_base<string OpStr> {
// SM4_Base is in X86InstrSSE.td.
let Predicates = [HasSM4, HasAVX10_2] in {
- defm Z128 : SM4_Base<OpStr, avx512vl_i32_info.info128.RC,
- "128", avx512vl_i32_info.info128.LdFrag,
- avx512vl_i32_info.info128.MemOp>, EVEX_V128;
- defm Z256 : SM4_Base<OpStr, avx512vl_i32_info.info256.RC,
- "256", avx512vl_i32_info.info256.LdFrag,
- avx512vl_i32_info.info256.MemOp>, EVEX_V256;
+ defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128;
+ defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256;
}
let Predicates = [HasSM4, HasAVX10_2_512] in
- defm Z : SM4_Base<OpStr, avx512vl_i32_info.info512.RC,
- "512", avx512vl_i32_info.info512.LdFrag,
- avx512vl_i32_info.info512.MemOp>, EVEX_V512;
+ defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512;
}
defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
index f89f4b5a8c0fb8..c1cb271a967b13 100644
--- a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
@@ -1,170 +1,170 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
-# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
-# ATT: vsm4key4 %zmm24, %zmm23, %zmm22
-# INTEL: vsm4key4 zmm22, zmm23, zmm24
-0x62,0x82,0x46,0x40,0xda,0xf0
+# ATT: vsm4key4 %zmm4, %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0xda,0xd4
-# ATT: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
-# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
-# ATT: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
-# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
-# ATT: vsm4key4 (%rip), %zmm23, %zmm22
-# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rip]
-0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT: vsm4key4 (%eax), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x66,0x48,0xda,0x10
-# ATT: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
-# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+# ATT: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
-# ATT: vsm4key4 8128(%rcx), %zmm23, %zmm22
-# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
+# ATT: vsm4key4 8128(%ecx), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
-# ATT: vsm4key4 -8192(%rdx), %zmm23, %zmm22
-# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-0x62,0xe2,0x46,0x40,0xda,0x72,0x80
+# ATT: vsm4key4 -8192(%edx), %zmm3, %zmm2
+# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x66,0x48,0xda,0x52,0x80
-# ATT: vsm4rnds4 %zmm24, %zmm23, %zmm22
-# INTEL: vsm4rnds4 zmm22, zmm23, zmm24
-0x62,0x82,0x47,0x40,0xda,0xf0
+# ATT: vsm4rnds4 %zmm4, %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0xda,0xd4
-# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
-# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
-# ATT: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
-# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
-# ATT: vsm4rnds4 (%rip), %zmm23, %zmm22
-# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
-0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT: vsm4rnds4 (%eax), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x67,0x48,0xda,0x10
-# ATT: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
-# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+# ATT: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
-# ATT: vsm4rnds4 8128(%rcx), %zmm23, %zmm22
-# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
+# ATT: vsm4rnds4 8128(%ecx), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
-# ATT: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
-# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-0x62,0xe2,0x47,0x40,0xda,0x72,0x80
+# ATT: vsm4rnds4 -8192(%edx), %zmm3, %zmm2
+# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x67,0x48,0xda,0x52,0x80
-# ATT: vsm4key4 %ymm24, %ymm23, %ymm22
-# INTEL: vsm4key4 ymm22, ymm23, ymm24
-0x62,0x82,0x46,0x20,0xda,0xf0
+# ATT: vsm4key4 %ymm4, %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0xda,0xd4
-# ATT: vsm4key4 %xmm24, %xmm23, %xmm22
-# INTEL: vsm4key4 xmm22, xmm23, xmm24
-0x62,0x82,0x46,0x00,0xda,0xf0
+# ATT: vsm4key4 %xmm4, %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0xda,0xd4
-# ATT: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
-# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT: vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
-# ATT: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
-# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT: vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
-# ATT: vsm4key4 (%rip), %ymm23, %ymm22
-# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rip]
-0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT: vsm4key4 (%eax), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x66,0x28,0xda,0x10
-# ATT: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
-# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+# ATT: vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
-# ATT: vsm4key4 4064(%rcx), %ymm23, %ymm22
-# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
+# ATT: vsm4key4 4064(%ecx), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x66,0x28,0xda,0x51,0x7f
-# ATT: vsm4key4 -4096(%rdx), %ymm23, %ymm22
-# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-0x62,0xe2,0x46,0x20,0xda,0x72,0x80
+# ATT: vsm4key4 -4096(%edx), %ymm3, %ymm2
+# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x66,0x28,0xda,0x52,0x80
-# ATT: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
-# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT: vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
-# ATT: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
-# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT: vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
-# ATT: vsm4key4 (%rip), %xmm23, %xmm22
-# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rip]
-0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT: vsm4key4 (%eax), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x66,0x08,0xda,0x10
-# ATT: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
-# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+# ATT: vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
-# ATT: vsm4key4 2032(%rcx), %xmm23, %xmm22
-# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
+# ATT: vsm4key4 2032(%ecx), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x66,0x08,0xda,0x51,0x7f
-# ATT: vsm4key4 -2048(%rdx), %xmm23, %xmm22
-# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
-0x62,0xe2,0x46,0x00,0xda,0x72,0x80
+# ATT: vsm4key4 -2048(%edx), %xmm3, %xmm2
+# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x66,0x08,0xda,0x52,0x80
-# ATT: vsm4rnds4 %ymm24, %ymm23, %ymm22
-# INTEL: vsm4rnds4 ymm22, ymm23, ymm24
-0x62,0x82,0x47,0x20,0xda,0xf0
+# ATT: vsm4rnds4 %ymm4, %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymm4
+0x62,0xf2,0x67,0x28,0xda,0xd4
-# ATT: vsm4rnds4 %xmm24, %xmm23, %xmm22
-# INTEL: vsm4rnds4 xmm22, xmm23, xmm24
-0x62,0x82,0x47,0x00,0xda,0xf0
+# ATT: vsm4rnds4 %xmm4, %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmm4
+0x62,0xf2,0x67,0x08,0xda,0xd4
-# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
-# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT: vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
-# ATT: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
-# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT: vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
-# ATT: vsm4rnds4 (%rip), %ymm23, %ymm22
-# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
-0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT: vsm4rnds4 (%eax), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x67,0x28,0xda,0x10
-# ATT: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
-# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+# ATT: vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
-# ATT: vsm4rnds4 4064(%rcx), %ymm23, %ymm22
-# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
+# ATT: vsm4rnds4 4064(%ecx), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x67,0x28,0xda,0x51,0x7f
-# ATT: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
-# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-0x62,0xe2,0x47,0x20,0xda,0x72,0x80
+# ATT: vsm4rnds4 -4096(%edx), %ymm3, %ymm2
+# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x67,0x28,0xda,0x52,0x80
-# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
-# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT: vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
-# ATT: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
-# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT: vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
-# ATT: vsm4rnds4 (%rip), %xmm23, %xmm22
-# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
-0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT: vsm4rnds4 (%eax), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x67,0x08,0xda,0x10
-# ATT: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
-# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+# ATT: vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
-# ATT: vsm4rnds4 2032(%rcx), %xmm23, %xmm22
-# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
+# ATT: vsm4rnds4 2032(%ecx), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x67,0x08,0xda,0x51,0x7f
-# ATT: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
-# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
-0x62,0xe2,0x47,0x00,0xda,0x72,0x80
+# ATT: vsm4rnds4 -2048(%edx), %xmm3, %xmm2
+# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x67,0x08,0xda,0x52,0x80
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
index c1cb271a967b13..f89f4b5a8c0fb8 100644
--- a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
@@ -1,170 +1,170 @@
-# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
-# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
-# ATT: vsm4key4 %zmm4, %zmm3, %zmm2
-# INTEL: vsm4key4 zmm2, zmm3, zmm4
-0x62,0xf2,0x66,0x48,0xda,0xd4
+# ATT: vsm4key4 %zmm24, %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0xda,0xf0
-# ATT: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
-# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
-# ATT: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
-# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
-# ATT: vsm4key4 (%eax), %zmm3, %zmm2
-# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [eax]
-0x62,0xf2,0x66,0x48,0xda,0x10
+# ATT: vsm4key4 (%rip), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
-# ATT: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
-# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+# ATT: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
-# ATT: vsm4key4 8128(%ecx), %zmm3, %zmm2
-# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
+# ATT: vsm4key4 8128(%rcx), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
-# ATT: vsm4key4 -8192(%edx), %zmm3, %zmm2
-# INTEL: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
-0x62,0xf2,0x66,0x48,0xda,0x52,0x80
+# ATT: vsm4key4 -8192(%rdx), %zmm23, %zmm22
+# INTEL: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x46,0x40,0xda,0x72,0x80
-# ATT: vsm4rnds4 %zmm4, %zmm3, %zmm2
-# INTEL: vsm4rnds4 zmm2, zmm3, zmm4
-0x62,0xf2,0x67,0x48,0xda,0xd4
+# ATT: vsm4rnds4 %zmm24, %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0xda,0xf0
-# ATT: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
-# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
-# ATT: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
-# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
-# ATT: vsm4rnds4 (%eax), %zmm3, %zmm2
-# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
-0x62,0xf2,0x67,0x48,0xda,0x10
+# ATT: vsm4rnds4 (%rip), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
-# ATT: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
-# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+# ATT: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
-# ATT: vsm4rnds4 8128(%ecx), %zmm3, %zmm2
-# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
+# ATT: vsm4rnds4 8128(%rcx), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
-# ATT: vsm4rnds4 -8192(%edx), %zmm3, %zmm2
-# INTEL: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
-0x62,0xf2,0x67,0x48,0xda,0x52,0x80
+# ATT: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
+# INTEL: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x47,0x40,0xda,0x72,0x80
-# ATT: vsm4key4 %ymm4, %ymm3, %ymm2
-# INTEL: vsm4key4 ymm2, ymm3, ymm4
-0x62,0xf2,0x66,0x28,0xda,0xd4
+# ATT: vsm4key4 %ymm24, %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0xda,0xf0
-# ATT: vsm4key4 %xmm4, %xmm3, %xmm2
-# INTEL: vsm4key4 xmm2, xmm3, xmm4
-0x62,0xf2,0x66,0x08,0xda,0xd4
+# ATT: vsm4key4 %xmm24, %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0xda,0xf0
-# ATT: vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
-# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
-# ATT: vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
-# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
-# ATT: vsm4key4 (%eax), %ymm3, %ymm2
-# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [eax]
-0x62,0xf2,0x66,0x28,0xda,0x10
+# ATT: vsm4key4 (%rip), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
-# ATT: vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
-# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+# ATT: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
-# ATT: vsm4key4 4064(%ecx), %ymm3, %ymm2
-# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-0x62,0xf2,0x66,0x28,0xda,0x51,0x7f
+# ATT: vsm4key4 4064(%rcx), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
-# ATT: vsm4key4 -4096(%edx), %ymm3, %ymm2
-# INTEL: vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
-0x62,0xf2,0x66,0x28,0xda,0x52,0x80
+# ATT: vsm4key4 -4096(%rdx), %ymm23, %ymm22
+# INTEL: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x46,0x20,0xda,0x72,0x80
-# ATT: vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
-# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
-# ATT: vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
-# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
-# ATT: vsm4key4 (%eax), %xmm3, %xmm2
-# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [eax]
-0x62,0xf2,0x66,0x08,0xda,0x10
+# ATT: vsm4key4 (%rip), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
-# ATT: vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
-# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+# ATT: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
-# ATT: vsm4key4 2032(%ecx), %xmm3, %xmm2
-# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-0x62,0xf2,0x66,0x08,0xda,0x51,0x7f
+# ATT: vsm4key4 2032(%rcx), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
-# ATT: vsm4key4 -2048(%edx), %xmm3, %xmm2
-# INTEL: vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
-0x62,0xf2,0x66,0x08,0xda,0x52,0x80
+# ATT: vsm4key4 -2048(%rdx), %xmm23, %xmm22
+# INTEL: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x46,0x00,0xda,0x72,0x80
-# ATT: vsm4rnds4 %ymm4, %ymm3, %ymm2
-# INTEL: vsm4rnds4 ymm2, ymm3, ymm4
-0x62,0xf2,0x67,0x28,0xda,0xd4
+# ATT: vsm4rnds4 %ymm24, %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0xda,0xf0
-# ATT: vsm4rnds4 %xmm4, %xmm3, %xmm2
-# INTEL: vsm4rnds4 xmm2, xmm3, xmm4
-0x62,0xf2,0x67,0x08,0xda,0xd4
+# ATT: vsm4rnds4 %xmm24, %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0xda,0xf0
-# ATT: vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
-# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
-# ATT: vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
-# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
-# ATT: vsm4rnds4 (%eax), %ymm3, %ymm2
-# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
-0x62,0xf2,0x67,0x28,0xda,0x10
+# ATT: vsm4rnds4 (%rip), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
-# ATT: vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
-# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+# ATT: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
-# ATT: vsm4rnds4 4064(%ecx), %ymm3, %ymm2
-# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-0x62,0xf2,0x67,0x28,0xda,0x51,0x7f
+# ATT: vsm4rnds4 4064(%rcx), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
-# ATT: vsm4rnds4 -4096(%edx), %ymm3, %ymm2
-# INTEL: vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
-0x62,0xf2,0x67,0x28,0xda,0x52,0x80
+# ATT: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
+# INTEL: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x47,0x20,0xda,0x72,0x80
-# ATT: vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
-# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
-# ATT: vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
-# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
-# ATT: vsm4rnds4 (%eax), %xmm3, %xmm2
-# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
-0x62,0xf2,0x67,0x08,0xda,0x10
+# ATT: vsm4rnds4 (%rip), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
-# ATT: vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
-# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+# ATT: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
-# ATT: vsm4rnds4 2032(%ecx), %xmm3, %xmm2
-# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-0x62,0xf2,0x67,0x08,0xda,0x51,0x7f
+# ATT: vsm4rnds4 2032(%rcx), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
-# ATT: vsm4rnds4 -2048(%edx), %xmm3, %xmm2
-# INTEL: vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
-0x62,0xf2,0x67,0x08,0xda,0x52,0x80
+# ATT: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
+# INTEL: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x47,0x00,0xda,0x72,0x80
diff --git a/llvm/test/MC/X86/sm4-evex-32-att.s b/llvm/test/MC/X86/sm4-evex-32-att.s
index 389a29b1189795..de10d95ac74d7b 100644
--- a/llvm/test/MC/X86/sm4-evex-32-att.s
+++ b/llvm/test/MC/X86/sm4-evex-32-att.s
@@ -1,224 +1,169 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
-// CHECK: vsm4key4 %zmm24, %zmm23, %zmm22
-// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
- vsm4key4 %zmm24, %zmm23, %zmm22
+// CHECK: vsm4key4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+ vsm4key4 %zmm4, %zmm3, %zmm2
-// CHECK: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
-// CHECK: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
-// CHECK: vsm4key4 (%rip), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4key4 (%rip), %zmm23, %zmm22
+// CHECK: vsm4key4 (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+ vsm4key4 (%eax), %zmm3, %zmm2
-// CHECK: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
- vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
-// CHECK: vsm4key4 8128(%rcx), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
- vsm4key4 8128(%rcx), %zmm23, %zmm22
+// CHECK: vsm4key4 8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+ vsm4key4 8128(%ecx), %zmm3, %zmm2
-// CHECK: vsm4key4 -8192(%rdx), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
- vsm4key4 -8192(%rdx), %zmm23, %zmm22
+// CHECK: vsm4key4 -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+ vsm4key4 -8192(%edx), %zmm3, %zmm2
-// CHECK: vsm4rnds4 %zmm24, %zmm23, %zmm22
-// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
- vsm4rnds4 %zmm24, %zmm23, %zmm22
+// CHECK: vsm4rnds4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+ vsm4rnds4 %zmm4, %zmm3, %zmm2
-// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
-// CHECK: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
-// CHECK: vsm4rnds4 (%rip), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4rnds4 (%rip), %zmm23, %zmm22
+// CHECK: vsm4rnds4 (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+ vsm4rnds4 (%eax), %zmm3, %zmm2
-// CHECK: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
- vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
-// CHECK: vsm4rnds4 8128(%rcx), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
- vsm4rnds4 8128(%rcx), %zmm23, %zmm22
+// CHECK: vsm4rnds4 8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+ vsm4rnds4 8128(%ecx), %zmm3, %zmm2
-// CHECK: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
- vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
+// CHECK: vsm4rnds4 -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+ vsm4rnds4 -8192(%edx), %zmm3, %zmm2
-// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22
-// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
- vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+ {evex} vsm4key4 %ymm4, %ymm3, %ymm2
-// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22
-// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
- vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+ {evex} vsm4key4 %xmm4, %xmm3, %xmm2
-// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
-// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
-// CHECK: vsm4key4 (%rip), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4key4 (%rip), %ymm23, %ymm22
+// CHECK: {evex} vsm4key4 (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+ {evex} vsm4key4 (%eax), %ymm3, %ymm2
-// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
- vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
-// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
- vsm4key4 4064(%rcx), %ymm23, %ymm22
+// CHECK: {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+ {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2
-// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
- vsm4key4 -4096(%rdx), %ymm23, %ymm22
+// CHECK: {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+ {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2
-// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
-// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
-// CHECK: vsm4key4 (%rip), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4key4 (%rip), %xmm23, %xmm22
+// CHECK: {evex} vsm4key4 (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+ {evex} vsm4key4 (%eax), %xmm3, %xmm2
-// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
- vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
-// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
- vsm4key4 2032(%rcx), %xmm23, %xmm22
+// CHECK: {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+ {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2
-// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
- vsm4key4 -2048(%rdx), %xmm23, %xmm22
-// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22
-// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
- vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+ {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2
-// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22
-// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
- vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+ {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
-// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
-
-// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
-
-// CHECK: vsm4key4 (%rip), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4key4 (%rip), %ymm23, %ymm22
-
-// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
- vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
-
-// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
- vsm4key4 4064(%rcx), %ymm23, %ymm22
-
-// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
- vsm4key4 -4096(%rdx), %ymm23, %ymm22
-
-// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
-
-// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
-
-// CHECK: vsm4key4 (%rip), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4key4 (%rip), %xmm23, %xmm22
-
-// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
- vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
-
-// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
- vsm4key4 2032(%rcx), %xmm23, %xmm22
-
-// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
- vsm4key4 -2048(%rdx), %xmm23, %xmm22
-
-// CHECK: vsm4rnds4 %ymm24, %ymm23, %ymm22
-// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
- vsm4rnds4 %ymm24, %ymm23, %ymm22
-
-// CHECK: vsm4rnds4 %xmm24, %xmm23, %xmm22
-// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
- vsm4rnds4 %xmm24, %xmm23, %xmm22
-
-// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
-
-// CHECK: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
-
-// CHECK: vsm4rnds4 (%rip), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4rnds4 (%rip), %ymm23, %ymm22
-
-// CHECK: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
- vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
-
-// CHECK: vsm4rnds4 4064(%rcx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
- vsm4rnds4 4064(%rcx), %ymm23, %ymm22
-
-// CHECK: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
- vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
-
-// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
-
-// CHECK: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
-
-// CHECK: vsm4rnds4 (%rip), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4rnds4 (%rip), %xmm23, %xmm22
-
-// CHECK: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
- vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+ {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
-// CHECK: vsm4rnds4 2032(%rcx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
- vsm4rnds4 2032(%rcx), %xmm23, %xmm22
+// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
-// CHECK: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
- vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
+// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+ {evex} vsm4rnds4 (%eax), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+ {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+ {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2
+
+// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+ {evex} vsm4rnds4 (%eax), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+ {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2
+
+// CHECK: {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+ {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2
\ No newline at end of file
diff --git a/llvm/test/MC/X86/sm4-evex-32-intel.s b/llvm/test/MC/X86/sm4-evex-32-intel.s
index 3cc18cf4178ed8..812fdb13f80913 100644
--- a/llvm/test/MC/X86/sm4-evex-32-intel.s
+++ b/llvm/test/MC/X86/sm4-evex-32-intel.s
@@ -1,169 +1,169 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
-// CHECK: vsm4key4 zmm22, zmm23, zmm24
-// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
- vsm4key4 zmm22, zmm23, zmm24
+// CHECK: vsm4key4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+ vsm4key4 zmm2, zmm3, zmm4
-// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+ vsm4key4 zmm2, zmm3, zmmword ptr [eax]
-// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
- vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
- vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+ vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
- vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+ vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
-// CHECK: vsm4rnds4 zmm22, zmm23, zmm24
-// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
- vsm4rnds4 zmm22, zmm23, zmm24
+// CHECK: vsm4rnds4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+ vsm4rnds4 zmm2, zmm3, zmm4
-// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
-// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
- vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
- vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
- vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+ vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
-// CHECK: vsm4key4 ymm22, ymm23, ymm24
-// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
- vsm4key4 ymm22, ymm23, ymm24
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+ {evex} vsm4key4 ymm2, ymm3, ymm4
-// CHECK: vsm4key4 xmm22, xmm23, xmm24
-// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
- vsm4key4 xmm22, xmm23, xmm24
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+ {evex} vsm4key4 xmm2, xmm3, xmm4
-// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
-// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
- vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
- vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
- vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+ {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
-// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
-// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
- vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
- vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
- vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+ {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
-// CHECK: vsm4rnds4 ymm22, ymm23, ymm24
-// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
- vsm4rnds4 ymm22, ymm23, ymm24
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+ {evex} vsm4rnds4 ymm2, ymm3, ymm4
-// CHECK: vsm4rnds4 xmm22, xmm23, xmm24
-// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
- vsm4rnds4 xmm22, xmm23, xmm24
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+ {evex} vsm4rnds4 xmm2, xmm3, xmm4
-// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
-// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
- vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
- vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
- vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+ {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
-// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
- vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
- vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
- vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
-// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
- vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
- vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
- vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+ {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
diff --git a/llvm/test/MC/X86/sm4-evex-64-att.s b/llvm/test/MC/X86/sm4-evex-64-att.s
index de10d95ac74d7b..389a29b1189795 100644
--- a/llvm/test/MC/X86/sm4-evex-64-att.s
+++ b/llvm/test/MC/X86/sm4-evex-64-att.s
@@ -1,169 +1,224 @@
-// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
-// CHECK: vsm4key4 %zmm4, %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
- vsm4key4 %zmm4, %zmm3, %zmm2
+// CHECK: vsm4key4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+ vsm4key4 %zmm24, %zmm23, %zmm22
-// CHECK: vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- vsm4key4 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %zmm23, %zmm22
-// CHECK: vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- vsm4key4 291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %zmm23, %zmm22
-// CHECK: vsm4key4 (%eax), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
- vsm4key4 (%eax), %zmm3, %zmm2
+// CHECK: vsm4key4 (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %zmm23, %zmm22
-// CHECK: vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
- vsm4key4 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4key4 -2048(,%rbp,2), %zmm23, %zmm22
-// CHECK: vsm4key4 8128(%ecx), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
- vsm4key4 8128(%ecx), %zmm3, %zmm2
+// CHECK: vsm4key4 8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+ vsm4key4 8128(%rcx), %zmm23, %zmm22
-// CHECK: vsm4key4 -8192(%edx), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
- vsm4key4 -8192(%edx), %zmm3, %zmm2
+// CHECK: vsm4key4 -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+ vsm4key4 -8192(%rdx), %zmm23, %zmm22
-// CHECK: vsm4rnds4 %zmm4, %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
- vsm4rnds4 %zmm4, %zmm3, %zmm2
+// CHECK: vsm4rnds4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+ vsm4rnds4 %zmm24, %zmm23, %zmm22
-// CHECK: vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- vsm4rnds4 268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 268435456(%rbp,%r14,8), %zmm23, %zmm22
-// CHECK: vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- vsm4rnds4 291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 291(%r8,%rax,4), %zmm23, %zmm22
-// CHECK: vsm4rnds4 (%eax), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
- vsm4rnds4 (%eax), %zmm3, %zmm2
+// CHECK: vsm4rnds4 (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 (%rip), %zmm23, %zmm22
-// CHECK: vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
- vsm4rnds4 -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4rnds4 -2048(,%rbp,2), %zmm23, %zmm22
-// CHECK: vsm4rnds4 8128(%ecx), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
- vsm4rnds4 8128(%ecx), %zmm3, %zmm2
+// CHECK: vsm4rnds4 8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+ vsm4rnds4 8128(%rcx), %zmm23, %zmm22
-// CHECK: vsm4rnds4 -8192(%edx), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
- vsm4rnds4 -8192(%edx), %zmm3, %zmm2
+// CHECK: vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+ vsm4rnds4 -8192(%rdx), %zmm23, %zmm22
-// CHECK: {evex} vsm4key4 %ymm4, %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
- {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+ vsm4key4 %ymm24, %ymm23, %ymm22
-// CHECK: {evex} vsm4key4 %xmm4, %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
- {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+ vsm4key4 %xmm24, %xmm23, %xmm22
-// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- {evex} vsm4key4 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
-// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- {evex} vsm4key4 291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
-// CHECK: {evex} vsm4key4 (%eax), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
- {evex} vsm4key4 (%eax), %ymm3, %ymm2
+// CHECK: vsm4key4 (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %ymm23, %ymm22
-// CHECK: {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
- {evex} vsm4key4 -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
-// CHECK: {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
- {evex} vsm4key4 4064(%ecx), %ymm3, %ymm2
+// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+ vsm4key4 4064(%rcx), %ymm23, %ymm22
-// CHECK: {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
- {evex} vsm4key4 -4096(%edx), %ymm3, %ymm2
+// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+ vsm4key4 -4096(%rdx), %ymm23, %ymm22
-// CHECK: {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- {evex} vsm4key4 268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
-// CHECK: {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- {evex} vsm4key4 291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
-// CHECK: {evex} vsm4key4 (%eax), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
- {evex} vsm4key4 (%eax), %xmm3, %xmm2
+// CHECK: vsm4key4 (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %xmm23, %xmm22
-// CHECK: {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
- {evex} vsm4key4 -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
-// CHECK: {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
- {evex} vsm4key4 2032(%ecx), %xmm3, %xmm2
+// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+ vsm4key4 2032(%rcx), %xmm23, %xmm22
-// CHECK: {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
- {evex} vsm4key4 -2048(%edx), %xmm3, %xmm2
+// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+ vsm4key4 -2048(%rdx), %xmm23, %xmm22
+// CHECK: vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+ vsm4key4 %ymm24, %ymm23, %ymm22
-// CHECK: {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
- {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+// CHECK: vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+ vsm4key4 %xmm24, %xmm23, %xmm22
-// CHECK: {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
- {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK: vsm4key4 (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %ymm23, %ymm22
+
+// CHECK: vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4key4 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vsm4key4 4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+ vsm4key4 4064(%rcx), %ymm23, %ymm22
+
+// CHECK: vsm4key4 -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+ vsm4key4 -4096(%rdx), %ymm23, %ymm22
+
+// CHECK: vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK: vsm4key4 (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 (%rip), %xmm23, %xmm22
+
+// CHECK: vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4key4 -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK: vsm4key4 2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+ vsm4key4 2032(%rcx), %xmm23, %xmm22
+
+// CHECK: vsm4key4 -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+ vsm4key4 -2048(%rdx), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+ vsm4rnds4 %ymm24, %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+ vsm4rnds4 %xmm24, %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 (%rip), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4rnds4 -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+ vsm4rnds4 4064(%rcx), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+ vsm4rnds4 -4096(%rdx), %ymm23, %ymm22
+
+// CHECK: vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 (%rip), %xmm23, %xmm22
+
+// CHECK: vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4rnds4 -512(,%rbp,2), %xmm23, %xmm22
-// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- {evex} vsm4rnds4 268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: vsm4rnds4 2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+ vsm4rnds4 2032(%rcx), %xmm23, %xmm22
-// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- {evex} vsm4rnds4 291(%edi,%eax,4), %ymm3, %ymm2
-
-// CHECK: {evex} vsm4rnds4 (%eax), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
- {evex} vsm4rnds4 (%eax), %ymm3, %ymm2
-
-// CHECK: {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
- {evex} vsm4rnds4 -1024(,%ebp,2), %ymm3, %ymm2
-
-// CHECK: {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
- {evex} vsm4rnds4 4064(%ecx), %ymm3, %ymm2
-
-// CHECK: {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
- {evex} vsm4rnds4 -4096(%edx), %ymm3, %ymm2
-
-// CHECK: {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- {evex} vsm4rnds4 268435456(%esp,%esi,8), %xmm3, %xmm2
-
-// CHECK: {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- {evex} vsm4rnds4 291(%edi,%eax,4), %xmm3, %xmm2
-
-// CHECK: {evex} vsm4rnds4 (%eax), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
- {evex} vsm4rnds4 (%eax), %xmm3, %xmm2
-
-// CHECK: {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
- {evex} vsm4rnds4 -512(,%ebp,2), %xmm3, %xmm2
-
-// CHECK: {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
- {evex} vsm4rnds4 2032(%ecx), %xmm3, %xmm2
-
-// CHECK: {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
- {evex} vsm4rnds4 -2048(%edx), %xmm3, %xmm2
\ No newline at end of file
+// CHECK: vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+ vsm4rnds4 -2048(%rdx), %xmm23, %xmm22
diff --git a/llvm/test/MC/X86/sm4-evex-64-intel.s b/llvm/test/MC/X86/sm4-evex-64-intel.s
index 812fdb13f80913..3cc18cf4178ed8 100644
--- a/llvm/test/MC/X86/sm4-evex-64-intel.s
+++ b/llvm/test/MC/X86/sm4-evex-64-intel.s
@@ -1,169 +1,169 @@
-// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
-// CHECK: vsm4key4 zmm2, zmm3, zmm4
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
- vsm4key4 zmm2, zmm3, zmm4
+// CHECK: vsm4key4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+ vsm4key4 zmm22, zmm23, zmm24
-// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
- vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 zmm22, zmm23, zmmword ptr [rip]
-// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
- vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
- vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+ vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-// CHECK: vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
- vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+ vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-// CHECK: vsm4rnds4 zmm2, zmm3, zmm4
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
- vsm4rnds4 zmm2, zmm3, zmm4
+// CHECK: vsm4rnds4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+ vsm4rnds4 zmm22, zmm23, zmm24
-// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
- vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
-// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
- vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
- vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-// CHECK: vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
- vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+ vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-// CHECK: {evex} vsm4key4 ymm2, ymm3, ymm4
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
- {evex} vsm4key4 ymm2, ymm3, ymm4
+// CHECK: vsm4key4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+ vsm4key4 ymm22, ymm23, ymm24
-// CHECK: {evex} vsm4key4 xmm2, xmm3, xmm4
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
- {evex} vsm4key4 xmm2, xmm3, xmm4
+// CHECK: vsm4key4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+ vsm4key4 xmm22, xmm23, xmm24
-// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
- {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 ymm22, ymm23, ymmword ptr [rip]
-// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
- {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
- {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+ vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-// CHECK: {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
- {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+ vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
- {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4key4 xmm22, xmm23, xmmword ptr [rip]
-// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
- {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
- {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+ vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-// CHECK: {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
- {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+ vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
-// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymm4
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
- {evex} vsm4rnds4 ymm2, ymm3, ymm4
+// CHECK: vsm4rnds4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+ vsm4rnds4 ymm22, ymm23, ymm24
-// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmm4
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
- {evex} vsm4rnds4 xmm2, xmm3, xmm4
+// CHECK: vsm4rnds4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+ vsm4rnds4 xmm22, xmm23, xmm24
-// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
- {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
-// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
- {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
- {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-// CHECK: {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
- {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+ vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
- {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
- {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
- {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
-// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
- {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
- {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-// CHECK: {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
- {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+ vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
>From 39513cdd193f3dc48e7df635b50c70ea51f2adef Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 24 Oct 2024 15:37:09 +0800
Subject: [PATCH 3/5] address comments
---
llvm/lib/Target/X86/X86InstrAVX10.td | 2 +-
llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll | 8 ++++----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index d6fab07ca2aaf1..d2873e5b1f7056 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1651,7 +1651,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
// SM4(EVEX)
multiclass avx10_sm4_base<string OpStr> {
// SM4_Base is in X86InstrSSE.td.
- let Predicates = [HasSM4, HasAVX10_2] in {
+ let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in {
defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128;
defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256;
}
diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
index fc46d3cf23fd41..825a11d66cd452 100644
--- a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -5,7 +5,7 @@
define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
; CHECK-LABEL: test_int_x86_vsm4key4128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsm4key4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
+; CHECK-NEXT: vsm4key4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
ret <4 x i32> %ret
@@ -15,7 +15,7 @@ declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
; CHECK-LABEL: test_int_x86_vsm4key4256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsm4key4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
+; CHECK-NEXT: vsm4key4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
ret <8 x i32> %ret
@@ -35,7 +35,7 @@ declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
; CHECK-LABEL: test_int_x86_vsm4rnds4128:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsm4rnds4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
+; CHECK-NEXT: vsm4rnds4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
ret <4 x i32> %ret
@@ -45,7 +45,7 @@ declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
; CHECK-LABEL: test_int_x86_vsm4rnds4256:
; CHECK: # %bb.0:
-; CHECK-NEXT: vsm4rnds4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
+; CHECK-NEXT: vsm4rnds4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
%ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
ret <8 x i32> %ret
>From c0a33abe804bafe11e9dcecb092b924bcd08cdb7 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 24 Oct 2024 16:55:11 +0800
Subject: [PATCH 4/5] address comments
---
clang/lib/Headers/immintrin.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 1b83dd2162707c..7d922267174279 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -665,9 +665,11 @@ _storebe_i64(void * __P, long long __D) {
#include <avx10_2_512niintrin.h>
#include <avx10_2_512satcvtdsintrin.h>
#include <avx10_2_512satcvtintrin.h>
-#if (defined(__SM4__))
-#include <sm4evexintrin.h>
#endif
+
+#if !defined(__SCE__) || __has_feature(modules) || \
+ (defined(__AVX10_2_512__) && defined(__SM4__))
+#include <movrs_avx10_2_512intrin.h>
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
>From 4d376482bcddddc6b5579ebc69c8c8b1dae2057e Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 28 Oct 2024 09:59:34 +0800
Subject: [PATCH 5/5] fix typo
---
clang/lib/Headers/immintrin.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index c27f5c0b03eed9..65ad72bc479f49 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -679,7 +679,7 @@ _storebe_i64(void * __P, long long __D) {
#if !defined(__SCE__) || __has_feature(modules) || \
(defined(__AVX10_2_512__) && defined(__SM4__))
-#include <movrs_avx10_2_512intrin.h>
+#include <sm4evexintrin.h>
#endif
#if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
More information about the llvm-commits
mailing list