[clang] [llvm] [X86] Support SM4 EVEX version intrinsics/instructions. (PR #113402)

Freddy Ye via cfe-commits cfe-commits at lists.llvm.org
Thu Oct 24 01:56:16 PDT 2024


https://github.com/FreddyLeaf updated https://github.com/llvm/llvm-project/pull/113402

>From 83a2ef421831dec1790c3c6adf3141ed1ac9a0d5 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Mon, 21 Oct 2024 09:30:26 +0800
Subject: [PATCH 1/4] [X86] Support SM4 EVEX version intrinsics/instructions.

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368
---
 clang/include/clang/Basic/BuiltinsX86.def     |   4 +
 clang/lib/Headers/CMakeLists.txt              |   1 +
 clang/lib/Headers/immintrin.h                 |   3 +
 clang/lib/Headers/sm4evexintrin.h             |  32 +++
 clang/test/CodeGen/X86/sm4-evex-builtins.c    |  19 ++
 llvm/docs/ReleaseNotes.md                     |   2 +
 llvm/include/llvm/IR/IntrinsicsX86.td         |   6 +
 llvm/lib/Target/X86/X86InstrAVX10.td          |  20 ++
 llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll  |  64 +++++
 llvm/test/MC/Disassembler/X86/sm4-evex-32.txt | 170 +++++++++++++
 llvm/test/MC/Disassembler/X86/sm4-evex-64.txt | 170 +++++++++++++
 llvm/test/MC/X86/sm4-evex-32-att.s            | 224 ++++++++++++++++++
 llvm/test/MC/X86/sm4-evex-32-intel.s          | 169 +++++++++++++
 llvm/test/MC/X86/sm4-evex-64-att.s            | 169 +++++++++++++
 llvm/test/MC/X86/sm4-evex-64-intel.s          | 169 +++++++++++++
 llvm/test/TableGen/x86-fold-tables.inc        |   6 +
 16 files changed, 1228 insertions(+)
 create mode 100644 clang/lib/Headers/sm4evexintrin.h
 create mode 100644 clang/test/CodeGen/X86/sm4-evex-builtins.c
 create mode 100644 llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
 create mode 100644 llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
 create mode 100644 llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
 create mode 100644 llvm/test/MC/X86/sm4-evex-32-att.s
 create mode 100644 llvm/test/MC/X86/sm4-evex-32-intel.s
 create mode 100644 llvm/test/MC/X86/sm4-evex-64-att.s
 create mode 100644 llvm/test/MC/X86/sm4-evex-64-intel.s

diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421ca..4486eb73a11fa6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 
+// SM4_EVEX
+TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+
 // AVX10 MINMAX
 TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..6a594dad0b67d2 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -241,6 +241,7 @@ set(x86_files
   shaintrin.h
   sm3intrin.h
   sm4intrin.h
+  sm4evexintrin.h
   smmintrin.h
   tbmintrin.h
   tmmintrin.h
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..1b83dd2162707c 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -665,6 +665,9 @@ _storebe_i64(void * __P, long long __D) {
 #include <avx10_2_512niintrin.h>
 #include <avx10_2_512satcvtdsintrin.h>
 #include <avx10_2_512satcvtintrin.h>
+#if (defined(__SM4__))
+#include <sm4evexintrin.h>
+#endif
 #endif
 
 #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
diff --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h
new file mode 100644
index 00000000000000..f6ae0037baea03
--- /dev/null
+++ b/clang/lib/Headers/sm4evexintrin.h
@@ -0,0 +1,32 @@
+/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __SM4EVEXINTRIN_H
+#define __SM4EVEXINTRIN_H
+
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("sm4,avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif // __SM4EVEXINTRIN_H
diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c
new file mode 100644
index 00000000000000..0e54bd008d4fb0
--- /dev/null
+++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_sm4key4_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_sm4key4_epi32(__A, __B);
+}
+
+__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_sm4rnds4_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_sm4rnds4_epi32(__A, __B);
+}
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index e5853789c78b63..16764210537689 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -196,6 +196,8 @@ Changes to the X86 Backend
 
 * Support ISA of `AVX10.2-256` and `AVX10.2-512`.
 
+* Support ISA of `SM4(EVEX)`.
+
 Changes to the OCaml bindings
 -----------------------------
 
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5262e3154ff721..7725bda1f4f598 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6109,6 +6109,12 @@ let TargetPrefix = "x86" in {
         DefaultAttrsIntrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_v8i32_ty],
         [IntrNoMem]>;
+def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+        [IntrNoMem]>;
+def int_x86_vsm4rnds4512 : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
+        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+        [IntrNoMem]>;
 }
 //===----------------------------------------------------------------------===//
 // RAO-INT intrinsics
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..640011f5ed28d7 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1647,3 +1647,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
   def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
                   (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
 }
+
+// SM4(EVEX)
+multiclass avx10_sm4_base<string OpStr> {
+  // SM4_Base is in X86InstrSSE.td.
+  let Predicates = [HasSM4, HasAVX10_2] in {
+    defm Z128 : SM4_Base<OpStr, avx512vl_i32_info.info128.RC,
+                "128", avx512vl_i32_info.info128.LdFrag,
+                avx512vl_i32_info.info128.MemOp>, EVEX_V128;
+    defm Z256 : SM4_Base<OpStr, avx512vl_i32_info.info256.RC,
+                "256", avx512vl_i32_info.info256.LdFrag,
+                avx512vl_i32_info.info256.MemOp>, EVEX_V256;
+  }
+  let Predicates = [HasSM4, HasAVX10_2_512] in
+    defm Z : SM4_Base<OpStr, avx512vl_i32_info.info512.RC,
+              "512", avx512vl_i32_info.info512.LdFrag,
+              avx512vl_i32_info.info512.MemOp>, EVEX_V512;
+}
+
+defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
+defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;
diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
new file mode 100644
index 00000000000000..fc46d3cf23fd41
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+
+define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+  ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+  ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+  ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+
+define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+  ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+  ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+  ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
new file mode 100644
index 00000000000000..f89f4b5a8c0fb8
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vsm4key4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0xda,0xf0
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4key4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x46,0x40,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0xda,0xf0
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x47,0x40,0xda,0x72,0x80
+
+# ATT:        vsm4key4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0xda,0xf0
+
+# ATT:        vsm4key4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0xda,0xf0
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4key4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x46,0x20,0xda,0x72,0x80
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4key4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x46,0x00,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0xda,0xf0
+
+# ATT:        vsm4rnds4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0xda,0xf0
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x47,0x20,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x47,0x00,0xda,0x72,0x80
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
new file mode 100644
index 00000000000000..c1cb271a967b13
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vsm4key4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0xda,0xd4
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x66,0x48,0xda,0x10
+
+# ATT:        vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4key4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x66,0x48,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0xda,0xd4
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x67,0x48,0xda,0x10
+
+# ATT:        vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x67,0x48,0xda,0x52,0x80
+
+# ATT:        vsm4key4 %ymm4, %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0xda,0xd4
+
+# ATT:        vsm4key4 %xmm4, %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0xda,0xd4
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x66,0x28,0xda,0x10
+
+# ATT:        vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4key4  4064(%ecx), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x66,0x28,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -4096(%edx), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x66,0x28,0xda,0x52,0x80
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x66,0x08,0xda,0x10
+
+# ATT:        vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4key4  2032(%ecx), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x66,0x08,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -2048(%edx), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x66,0x08,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4 %ymm4, %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymm4
+0x62,0xf2,0x67,0x28,0xda,0xd4
+
+# ATT:        vsm4rnds4 %xmm4, %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmm4
+0x62,0xf2,0x67,0x08,0xda,0xd4
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x67,0x28,0xda,0x10
+
+# ATT:        vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x67,0x28,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x67,0x28,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x67,0x08,0xda,0x10
+
+# ATT:        vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x67,0x08,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -2048(%edx), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x67,0x08,0xda,0x52,0x80
diff --git a/llvm/test/MC/X86/sm4-evex-32-att.s b/llvm/test/MC/X86/sm4-evex-32-att.s
new file mode 100644
index 00000000000000..389a29b1189795
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-32-att.s
@@ -0,0 +1,224 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+               vsm4key4 %zmm24, %zmm23, %zmm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+               vsm4key4  8128(%rcx), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+               vsm4key4  -8192(%rdx), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+               vsm4rnds4 %zmm24, %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+               vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+               vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+
+// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4  -2048(%rdx), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+               vsm4rnds4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+               vsm4rnds4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+               vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+               vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+               vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+               vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
diff --git a/llvm/test/MC/X86/sm4-evex-32-intel.s b/llvm/test/MC/X86/sm4-evex-32-intel.s
new file mode 100644
index 00000000000000..3cc18cf4178ed8
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-32-intel.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+               vsm4key4 zmm22, zmm23, zmm24
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+               vsm4rnds4 zmm22, zmm23, zmm24
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 ymm22, ymm23, ymm24
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 xmm22, xmm23, xmm24
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+               vsm4rnds4 ymm22, ymm23, ymm24
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+               vsm4rnds4 xmm22, xmm23, xmm24
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
diff --git a/llvm/test/MC/X86/sm4-evex-64-att.s b/llvm/test/MC/X86/sm4-evex-64-att.s
new file mode 100644
index 00000000000000..de10d95ac74d7b
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-64-att.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+               vsm4key4 %zmm4, %zmm3, %zmm2
+
+// CHECK:      vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+               vsm4key4  (%eax), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+               vsm4key4  8128(%ecx), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+               vsm4key4  -8192(%edx), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+               vsm4rnds4 %zmm4, %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+               vsm4rnds4  (%eax), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+               vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+               vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+
+// CHECK:      {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+               {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+               {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+               {evex} vsm4key4  (%eax), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+               {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+               {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+               {evex} vsm4key4  (%eax), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+               {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+               {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+               {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+               {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+               {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+               {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+               {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+               {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+               {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+               {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
\ No newline at end of file
diff --git a/llvm/test/MC/X86/sm4-evex-64-intel.s b/llvm/test/MC/X86/sm4-evex-64-intel.s
new file mode 100644
index 00000000000000..812fdb13f80913
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-64-intel.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+               vsm4key4 zmm2, zmm3, zmm4
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+               vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+               vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+               vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+               vsm4rnds4 zmm2, zmm3, zmm4
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+               {evex} vsm4key4 ymm2, ymm3, ymm4
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+               {evex} vsm4key4 xmm2, xmm3, xmm4
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+               {evex} vsm4rnds4 ymm2, ymm3, ymm4
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+               {evex} vsm4rnds4 xmm2, xmm3, xmm4
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 85d9b02ac0cbf1..43c206fa0af698 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -4113,8 +4113,14 @@ static const X86FoldTableEntry Table2[] = {
   {X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0},
   {X86::VSHUFPSrri, X86::VSHUFPSrmi, 0},
   {X86::VSM4KEY4Yrr, X86::VSM4KEY4Yrm, 0},
+  {X86::VSM4KEY4Z128rr, X86::VSM4KEY4Z128rm, 0},
+  {X86::VSM4KEY4Z256rr, X86::VSM4KEY4Z256rm, 0},
+  {X86::VSM4KEY4Zrr, X86::VSM4KEY4Zrm, 0},
   {X86::VSM4KEY4rr, X86::VSM4KEY4rm, 0},
   {X86::VSM4RNDS4Yrr, X86::VSM4RNDS4Yrm, 0},
+  {X86::VSM4RNDS4Z128rr, X86::VSM4RNDS4Z128rm, 0},
+  {X86::VSM4RNDS4Z256rr, X86::VSM4RNDS4Z256rm, 0},
+  {X86::VSM4RNDS4Zrr, X86::VSM4RNDS4Zrm, 0},
   {X86::VSM4RNDS4rr, X86::VSM4RNDS4rm, 0},
   {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mkz, 0},
   {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mkz, 0},

>From 23d4bfcabdca8968cba600e2fc0b440121990ef5 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 24 Oct 2024 13:43:59 +0800
Subject: [PATCH 2/4] address comments

---
 clang/docs/ReleaseNotes.rst                   |   4 +
 llvm/include/llvm/IR/IntrinsicsX86.td         |  14 +-
 llvm/lib/Target/X86/X86InstrAVX10.td          |  12 +-
 llvm/test/MC/Disassembler/X86/sm4-evex-32.txt | 256 +++++++-------
 llvm/test/MC/Disassembler/X86/sm4-evex-64.txt | 256 +++++++-------
 llvm/test/MC/X86/sm4-evex-32-att.s            | 329 ++++++++----------
 llvm/test/MC/X86/sm4-evex-32-intel.s          | 254 +++++++-------
 llvm/test/MC/X86/sm4-evex-64-att.s            | 329 ++++++++++--------
 llvm/test/MC/X86/sm4-evex-64-intel.s          | 254 +++++++-------
 9 files changed, 855 insertions(+), 853 deletions(-)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index b7a6ace8bb895d..adc62122774b90 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -606,6 +606,10 @@ X86 Support
   * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
   ``*_(mask(z)))_minmax_s[s|d|h]``.
 
+- Supported intrinsics for ``SM4 and AVX10.2``.
+  * Supported SM4 intrinsics of ``_mm512_sm4key4_epi32`` and
+  ``_mm512_sm4rnds4_epi32``.
+
 - All intrinsics in adcintrin.h can now be used in constant expressions.
 
 - All intrinsics in adxintrin.h can now be used in constant expressions.
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 7725bda1f4f598..34524dbff6c391 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6099,6 +6099,11 @@ let TargetPrefix = "x86" in {
         DefaultAttrsIntrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_v8i32_ty],
         [IntrNoMem]>;
+  def int_x86_vsm4key4512
+      : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+        DefaultAttrsIntrinsic<[llvm_v16i32_ty],
+        [llvm_v16i32_ty, llvm_v16i32_ty],
+        [IntrNoMem]>;
   def int_x86_vsm4rnds4128
       : ClangBuiltin<"__builtin_ia32_vsm4rnds4128">,
         DefaultAttrsIntrinsic<[llvm_v4i32_ty],
@@ -6109,11 +6114,10 @@ let TargetPrefix = "x86" in {
         DefaultAttrsIntrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_v8i32_ty],
         [IntrNoMem]>;
-def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
-        [IntrNoMem]>;
-def int_x86_vsm4rnds4512 : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
-        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+  def int_x86_vsm4rnds4512
+      : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
+        DefaultAttrsIntrinsic<[llvm_v16i32_ty],
+        [llvm_v16i32_ty, llvm_v16i32_ty],
         [IntrNoMem]>;
 }
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 640011f5ed28d7..d6fab07ca2aaf1 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1652,17 +1652,11 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
 multiclass avx10_sm4_base<string OpStr> {
   // SM4_Base is in X86InstrSSE.td.
   let Predicates = [HasSM4, HasAVX10_2] in {
-    defm Z128 : SM4_Base<OpStr, avx512vl_i32_info.info128.RC,
-                "128", avx512vl_i32_info.info128.LdFrag,
-                avx512vl_i32_info.info128.MemOp>, EVEX_V128;
-    defm Z256 : SM4_Base<OpStr, avx512vl_i32_info.info256.RC,
-                "256", avx512vl_i32_info.info256.LdFrag,
-                avx512vl_i32_info.info256.MemOp>, EVEX_V256;
+    defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128;
+    defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256;
   }
   let Predicates = [HasSM4, HasAVX10_2_512] in
-    defm Z : SM4_Base<OpStr, avx512vl_i32_info.info512.RC,
-              "512", avx512vl_i32_info.info512.LdFrag,
-              avx512vl_i32_info.info512.MemOp>, EVEX_V512;
+    defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512;
 }
 
 defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
index f89f4b5a8c0fb8..c1cb271a967b13 100644
--- a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
@@ -1,170 +1,170 @@
-# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
-# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
 
-# ATT:        vsm4key4 %zmm24, %zmm23, %zmm22
-# INTEL:      vsm4key4 zmm22, zmm23, zmm24
-0x62,0x82,0x46,0x40,0xda,0xf0
+# ATT:        vsm4key4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0xda,0xd4
 
-# ATT:        vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
-# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
-# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT:        vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4key4  (%rip), %zmm23, %zmm22
-# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
-0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT:        vsm4key4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x66,0x48,0xda,0x10
 
-# ATT:        vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
-# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+# ATT:        vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
 
-# ATT:        vsm4key4  8128(%rcx), %zmm23, %zmm22
-# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
+# ATT:        vsm4key4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
 
-# ATT:        vsm4key4  -8192(%rdx), %zmm23, %zmm22
-# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-0x62,0xe2,0x46,0x40,0xda,0x72,0x80
+# ATT:        vsm4key4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x66,0x48,0xda,0x52,0x80
 
-# ATT:        vsm4rnds4 %zmm24, %zmm23, %zmm22
-# INTEL:      vsm4rnds4 zmm22, zmm23, zmm24
-0x62,0x82,0x47,0x40,0xda,0xf0
+# ATT:        vsm4rnds4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0xda,0xd4
 
-# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
-# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
-# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4rnds4  (%rip), %zmm23, %zmm22
-# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
-0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT:        vsm4rnds4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x67,0x48,0xda,0x10
 
-# ATT:        vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
-# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+# ATT:        vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
 
-# ATT:        vsm4rnds4  8128(%rcx), %zmm23, %zmm22
-# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
+# ATT:        vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
 
-# ATT:        vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
-# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-0x62,0xe2,0x47,0x40,0xda,0x72,0x80
+# ATT:        vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x67,0x48,0xda,0x52,0x80
 
-# ATT:        vsm4key4 %ymm24, %ymm23, %ymm22
-# INTEL:      vsm4key4 ymm22, ymm23, ymm24
-0x62,0x82,0x46,0x20,0xda,0xf0
+# ATT:        vsm4key4 %ymm4, %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0xda,0xd4
 
-# ATT:        vsm4key4 %xmm24, %xmm23, %xmm22
-# INTEL:      vsm4key4 xmm22, xmm23, xmm24
-0x62,0x82,0x46,0x00,0xda,0xf0
+# ATT:        vsm4key4 %xmm4, %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0xda,0xd4
 
-# ATT:        vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
-# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
-# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT:        vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4key4  (%rip), %ymm23, %ymm22
-# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
-0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT:        vsm4key4  (%eax), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x66,0x28,0xda,0x10
 
-# ATT:        vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
-# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+# ATT:        vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
 
-# ATT:        vsm4key4  4064(%rcx), %ymm23, %ymm22
-# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
+# ATT:        vsm4key4  4064(%ecx), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x66,0x28,0xda,0x51,0x7f
 
-# ATT:        vsm4key4  -4096(%rdx), %ymm23, %ymm22
-# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-0x62,0xe2,0x46,0x20,0xda,0x72,0x80
+# ATT:        vsm4key4  -4096(%edx), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x66,0x28,0xda,0x52,0x80
 
-# ATT:        vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
-# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
-# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT:        vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4key4  (%rip), %xmm23, %xmm22
-# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
-0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT:        vsm4key4  (%eax), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x66,0x08,0xda,0x10
 
-# ATT:        vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
-# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+# ATT:        vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
 
-# ATT:        vsm4key4  2032(%rcx), %xmm23, %xmm22
-# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
+# ATT:        vsm4key4  2032(%ecx), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x66,0x08,0xda,0x51,0x7f
 
-# ATT:        vsm4key4  -2048(%rdx), %xmm23, %xmm22
-# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
-0x62,0xe2,0x46,0x00,0xda,0x72,0x80
+# ATT:        vsm4key4  -2048(%edx), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x66,0x08,0xda,0x52,0x80
 
-# ATT:        vsm4rnds4 %ymm24, %ymm23, %ymm22
-# INTEL:      vsm4rnds4 ymm22, ymm23, ymm24
-0x62,0x82,0x47,0x20,0xda,0xf0
+# ATT:        vsm4rnds4 %ymm4, %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymm4
+0x62,0xf2,0x67,0x28,0xda,0xd4
 
-# ATT:        vsm4rnds4 %xmm24, %xmm23, %xmm22
-# INTEL:      vsm4rnds4 xmm22, xmm23, xmm24
-0x62,0x82,0x47,0x00,0xda,0xf0
+# ATT:        vsm4rnds4 %xmm4, %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmm4
+0x62,0xf2,0x67,0x08,0xda,0xd4
 
-# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
-# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
-# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4rnds4  (%rip), %ymm23, %ymm22
-# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
-0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT:        vsm4rnds4  (%eax), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x67,0x28,0xda,0x10
 
-# ATT:        vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
-# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+# ATT:        vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
 
-# ATT:        vsm4rnds4  4064(%rcx), %ymm23, %ymm22
-# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
+# ATT:        vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x67,0x28,0xda,0x51,0x7f
 
-# ATT:        vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
-# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-0x62,0xe2,0x47,0x20,0xda,0x72,0x80
+# ATT:        vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x67,0x28,0xda,0x52,0x80
 
-# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
-# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
-# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4rnds4  (%rip), %xmm23, %xmm22
-# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
-0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+# ATT:        vsm4rnds4  (%eax), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x67,0x08,0xda,0x10
 
-# ATT:        vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
-# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+# ATT:        vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
 
-# ATT:        vsm4rnds4  2032(%rcx), %xmm23, %xmm22
-# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
+# ATT:        vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x67,0x08,0xda,0x51,0x7f
 
-# ATT:        vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
-# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
-0x62,0xe2,0x47,0x00,0xda,0x72,0x80
+# ATT:        vsm4rnds4  -2048(%edx), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x67,0x08,0xda,0x52,0x80
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
index c1cb271a967b13..f89f4b5a8c0fb8 100644
--- a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
@@ -1,170 +1,170 @@
-# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
-# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
 
-# ATT:        vsm4key4 %zmm4, %zmm3, %zmm2
-# INTEL:      vsm4key4 zmm2, zmm3, zmm4
-0x62,0xf2,0x66,0x48,0xda,0xd4
+# ATT:        vsm4key4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0xda,0xf0
 
-# ATT:        vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
-# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
-# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT:        vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4key4  (%eax), %zmm3, %zmm2
-# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
-0x62,0xf2,0x66,0x48,0xda,0x10
+# ATT:        vsm4key4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
 
-# ATT:        vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
-# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+# ATT:        vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
 
-# ATT:        vsm4key4  8128(%ecx), %zmm3, %zmm2
-# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
+# ATT:        vsm4key4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
 
-# ATT:        vsm4key4  -8192(%edx), %zmm3, %zmm2
-# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
-0x62,0xf2,0x66,0x48,0xda,0x52,0x80
+# ATT:        vsm4key4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x46,0x40,0xda,0x72,0x80
 
-# ATT:        vsm4rnds4 %zmm4, %zmm3, %zmm2
-# INTEL:      vsm4rnds4 zmm2, zmm3, zmm4
-0x62,0xf2,0x67,0x48,0xda,0xd4
+# ATT:        vsm4rnds4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0xda,0xf0
 
-# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
-# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
-# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4rnds4  (%eax), %zmm3, %zmm2
-# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
-0x62,0xf2,0x67,0x48,0xda,0x10
+# ATT:        vsm4rnds4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
 
-# ATT:        vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
-# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+# ATT:        vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
 
-# ATT:        vsm4rnds4  8128(%ecx), %zmm3, %zmm2
-# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
+# ATT:        vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
 
-# ATT:        vsm4rnds4  -8192(%edx), %zmm3, %zmm2
-# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
-0x62,0xf2,0x67,0x48,0xda,0x52,0x80
+# ATT:        vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x47,0x40,0xda,0x72,0x80
 
-# ATT:        vsm4key4 %ymm4, %ymm3, %ymm2
-# INTEL:      vsm4key4 ymm2, ymm3, ymm4
-0x62,0xf2,0x66,0x28,0xda,0xd4
+# ATT:        vsm4key4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0xda,0xf0
 
-# ATT:        vsm4key4 %xmm4, %xmm3, %xmm2
-# INTEL:      vsm4key4 xmm2, xmm3, xmm4
-0x62,0xf2,0x66,0x08,0xda,0xd4
+# ATT:        vsm4key4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0xda,0xf0
 
-# ATT:        vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
-# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
-# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT:        vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4key4  (%eax), %ymm3, %ymm2
-# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [eax]
-0x62,0xf2,0x66,0x28,0xda,0x10
+# ATT:        vsm4key4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
 
-# ATT:        vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
-# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+# ATT:        vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
 
-# ATT:        vsm4key4  4064(%ecx), %ymm3, %ymm2
-# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-0x62,0xf2,0x66,0x28,0xda,0x51,0x7f
+# ATT:        vsm4key4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
 
-# ATT:        vsm4key4  -4096(%edx), %ymm3, %ymm2
-# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
-0x62,0xf2,0x66,0x28,0xda,0x52,0x80
+# ATT:        vsm4key4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x46,0x20,0xda,0x72,0x80
 
-# ATT:        vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
-# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
-# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT:        vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4key4  (%eax), %xmm3, %xmm2
-# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [eax]
-0x62,0xf2,0x66,0x08,0xda,0x10
+# ATT:        vsm4key4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
 
-# ATT:        vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
-# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+# ATT:        vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
 
-# ATT:        vsm4key4  2032(%ecx), %xmm3, %xmm2
-# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-0x62,0xf2,0x66,0x08,0xda,0x51,0x7f
+# ATT:        vsm4key4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
 
-# ATT:        vsm4key4  -2048(%edx), %xmm3, %xmm2
-# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
-0x62,0xf2,0x66,0x08,0xda,0x52,0x80
+# ATT:        vsm4key4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x46,0x00,0xda,0x72,0x80
 
-# ATT:        vsm4rnds4 %ymm4, %ymm3, %ymm2
-# INTEL:      vsm4rnds4 ymm2, ymm3, ymm4
-0x62,0xf2,0x67,0x28,0xda,0xd4
+# ATT:        vsm4rnds4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0xda,0xf0
 
-# ATT:        vsm4rnds4 %xmm4, %xmm3, %xmm2
-# INTEL:      vsm4rnds4 xmm2, xmm3, xmm4
-0x62,0xf2,0x67,0x08,0xda,0xd4
+# ATT:        vsm4rnds4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0xda,0xf0
 
-# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
-# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
-# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4rnds4  (%eax), %ymm3, %ymm2
-# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
-0x62,0xf2,0x67,0x28,0xda,0x10
+# ATT:        vsm4rnds4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
 
-# ATT:        vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
-# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+# ATT:        vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
 
-# ATT:        vsm4rnds4  4064(%ecx), %ymm3, %ymm2
-# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-0x62,0xf2,0x67,0x28,0xda,0x51,0x7f
+# ATT:        vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
 
-# ATT:        vsm4rnds4  -4096(%edx), %ymm3, %ymm2
-# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
-0x62,0xf2,0x67,0x28,0xda,0x52,0x80
+# ATT:        vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x47,0x20,0xda,0x72,0x80
 
-# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
-# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
 
-# ATT:        vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
-# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
 
-# ATT:        vsm4rnds4  (%eax), %xmm3, %xmm2
-# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
-0x62,0xf2,0x67,0x08,0xda,0x10
+# ATT:        vsm4rnds4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
 
-# ATT:        vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
-# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+# ATT:        vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
 
-# ATT:        vsm4rnds4  2032(%ecx), %xmm3, %xmm2
-# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-0x62,0xf2,0x67,0x08,0xda,0x51,0x7f
+# ATT:        vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
 
-# ATT:        vsm4rnds4  -2048(%edx), %xmm3, %xmm2
-# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
-0x62,0xf2,0x67,0x08,0xda,0x52,0x80
+# ATT:        vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x47,0x00,0xda,0x72,0x80
diff --git a/llvm/test/MC/X86/sm4-evex-32-att.s b/llvm/test/MC/X86/sm4-evex-32-att.s
index 389a29b1189795..de10d95ac74d7b 100644
--- a/llvm/test/MC/X86/sm4-evex-32-att.s
+++ b/llvm/test/MC/X86/sm4-evex-32-att.s
@@ -1,224 +1,169 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
 
-// CHECK:      vsm4key4 %zmm24, %zmm23, %zmm22
-// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
-               vsm4key4 %zmm24, %zmm23, %zmm22
+// CHECK:      vsm4key4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+               vsm4key4 %zmm4, %zmm3, %zmm2
 
-// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK:      vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
 
-// CHECK:      vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK:      vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
 
-// CHECK:      vsm4key4  (%rip), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4key4  (%rip), %zmm23, %zmm22
+// CHECK:      vsm4key4  (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+               vsm4key4  (%eax), %zmm3, %zmm2
 
-// CHECK:      vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
-               vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK:      vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
 
-// CHECK:      vsm4key4  8128(%rcx), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
-               vsm4key4  8128(%rcx), %zmm23, %zmm22
+// CHECK:      vsm4key4  8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+               vsm4key4  8128(%ecx), %zmm3, %zmm2
 
-// CHECK:      vsm4key4  -8192(%rdx), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
-               vsm4key4  -8192(%rdx), %zmm23, %zmm22
+// CHECK:      vsm4key4  -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+               vsm4key4  -8192(%edx), %zmm3, %zmm2
 
-// CHECK:      vsm4rnds4 %zmm24, %zmm23, %zmm22
-// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
-               vsm4rnds4 %zmm24, %zmm23, %zmm22
+// CHECK:      vsm4rnds4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+               vsm4rnds4 %zmm4, %zmm3, %zmm2
 
-// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK:      vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
 
-// CHECK:      vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK:      vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
 
-// CHECK:      vsm4rnds4  (%rip), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4rnds4  (%rip), %zmm23, %zmm22
+// CHECK:      vsm4rnds4  (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+               vsm4rnds4  (%eax), %zmm3, %zmm2
 
-// CHECK:      vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
-               vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK:      vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
 
-// CHECK:      vsm4rnds4  8128(%rcx), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
-               vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+// CHECK:      vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+               vsm4rnds4  8128(%ecx), %zmm3, %zmm2
 
-// CHECK:      vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
-               vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+// CHECK:      vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+               vsm4rnds4  -8192(%edx), %zmm3, %zmm2
 
-// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
-// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
-               vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK:      {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+               {evex} vsm4key4 %ymm4, %ymm3, %ymm2
 
-// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
-// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
-               vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK:      {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+               {evex} vsm4key4 %xmm4, %xmm3, %xmm2
 
-// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
 
-// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
 
-// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4key4  (%rip), %ymm23, %ymm22
+// CHECK:      {evex} vsm4key4  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+               {evex} vsm4key4  (%eax), %ymm3, %ymm2
 
-// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
-               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK:      {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
 
-// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
-               vsm4key4  4064(%rcx), %ymm23, %ymm22
+// CHECK:      {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+               {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
 
-// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
-               vsm4key4  -4096(%rdx), %ymm23, %ymm22
+// CHECK:      {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+               {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
 
-// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
 
-// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
 
-// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4key4  (%rip), %xmm23, %xmm22
+// CHECK:      {evex} vsm4key4  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+               {evex} vsm4key4  (%eax), %xmm3, %xmm2
 
-// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
-               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK:      {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
 
-// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
-               vsm4key4  2032(%rcx), %xmm23, %xmm22
+// CHECK:      {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+               {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
 
-// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
-               vsm4key4  -2048(%rdx), %xmm23, %xmm22
-// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
-// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
-               vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK:      {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+               {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
 
-// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
-// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
-               vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK:      {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+               {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
 
-// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
-
-// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
-
-// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4key4  (%rip), %ymm23, %ymm22
-
-// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
-               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
-
-// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
-               vsm4key4  4064(%rcx), %ymm23, %ymm22
-
-// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
-               vsm4key4  -4096(%rdx), %ymm23, %ymm22
-
-// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
-
-// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
-
-// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4key4  (%rip), %xmm23, %xmm22
-
-// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
-               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
-
-// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
-               vsm4key4  2032(%rcx), %xmm23, %xmm22
-
-// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
-               vsm4key4  -2048(%rdx), %xmm23, %xmm22
-
-// CHECK:      vsm4rnds4 %ymm24, %ymm23, %ymm22
-// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
-               vsm4rnds4 %ymm24, %ymm23, %ymm22
-
-// CHECK:      vsm4rnds4 %xmm24, %xmm23, %xmm22
-// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
-               vsm4rnds4 %xmm24, %xmm23, %xmm22
-
-// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
-
-// CHECK:      vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
-
-// CHECK:      vsm4rnds4  (%rip), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4rnds4  (%rip), %ymm23, %ymm22
-
-// CHECK:      vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
-               vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
-
-// CHECK:      vsm4rnds4  4064(%rcx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
-               vsm4rnds4  4064(%rcx), %ymm23, %ymm22
-
-// CHECK:      vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
-               vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
-
-// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
-
-// CHECK:      vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
-
-// CHECK:      vsm4rnds4  (%rip), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4rnds4  (%rip), %xmm23, %xmm22
-
-// CHECK:      vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
-               vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK:      {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+               {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
 
-// CHECK:      vsm4rnds4  2032(%rcx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
-               vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
 
-// CHECK:      vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
-               vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+               {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+               {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+               {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+               {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+               {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+               {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
\ No newline at end of file
diff --git a/llvm/test/MC/X86/sm4-evex-32-intel.s b/llvm/test/MC/X86/sm4-evex-32-intel.s
index 3cc18cf4178ed8..812fdb13f80913 100644
--- a/llvm/test/MC/X86/sm4-evex-32-intel.s
+++ b/llvm/test/MC/X86/sm4-evex-32-intel.s
@@ -1,169 +1,169 @@
-// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-// CHECK:      vsm4key4 zmm22, zmm23, zmm24
-// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
-               vsm4key4 zmm22, zmm23, zmm24
+// CHECK:      vsm4key4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+               vsm4key4 zmm2, zmm3, zmm4
 
-// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
 
-// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
 
-// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+               vsm4key4 zmm2, zmm3, zmmword ptr [eax]
 
-// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
-               vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
 
-// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
-               vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+               vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
 
-// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
-               vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+               vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
 
-// CHECK:      vsm4rnds4 zmm22, zmm23, zmm24
-// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
-               vsm4rnds4 zmm22, zmm23, zmm24
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+               vsm4rnds4 zmm2, zmm3, zmm4
 
-// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
 
-// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
 
-// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
 
-// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
-               vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
 
-// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
-               vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
 
-// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
-// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
-               vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
 
-// CHECK:      vsm4key4 ymm22, ymm23, ymm24
-// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
-               vsm4key4 ymm22, ymm23, ymm24
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+               {evex} vsm4key4 ymm2, ymm3, ymm4
 
-// CHECK:      vsm4key4 xmm22, xmm23, xmm24
-// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
-               vsm4key4 xmm22, xmm23, xmm24
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+               {evex} vsm4key4 xmm2, xmm3, xmm4
 
-// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
 
-// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
 
-// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
 
-// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
-               vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
 
-// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
-               vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
 
-// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
-               vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
 
-// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
 
-// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
 
-// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
 
-// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
-               vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
 
-// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
-               vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
 
-// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
-// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
-               vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
 
-// CHECK:      vsm4rnds4 ymm22, ymm23, ymm24
-// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
-               vsm4rnds4 ymm22, ymm23, ymm24
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+               {evex} vsm4rnds4 ymm2, ymm3, ymm4
 
-// CHECK:      vsm4rnds4 xmm22, xmm23, xmm24
-// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
-               vsm4rnds4 xmm22, xmm23, xmm24
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+               {evex} vsm4rnds4 xmm2, xmm3, xmm4
 
-// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
 
-// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
 
-// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
 
-// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
-               vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
 
-// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
-               vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
 
-// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
-// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
-               vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
 
-// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
-// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
-               vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
 
-// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
-// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
-               vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
 
-// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
-               vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
 
-// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
-               vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
 
-// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
-               vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
 
-// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
-// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
-               vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
diff --git a/llvm/test/MC/X86/sm4-evex-64-att.s b/llvm/test/MC/X86/sm4-evex-64-att.s
index de10d95ac74d7b..389a29b1189795 100644
--- a/llvm/test/MC/X86/sm4-evex-64-att.s
+++ b/llvm/test/MC/X86/sm4-evex-64-att.s
@@ -1,169 +1,224 @@
-// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
 
-// CHECK:      vsm4key4 %zmm4, %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
-               vsm4key4 %zmm4, %zmm3, %zmm2
+// CHECK:      vsm4key4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+               vsm4key4 %zmm24, %zmm23, %zmm22
 
-// CHECK:      vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
 
-// CHECK:      vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK:      vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
 
-// CHECK:      vsm4key4  (%eax), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
-               vsm4key4  (%eax), %zmm3, %zmm2
+// CHECK:      vsm4key4  (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %zmm23, %zmm22
 
-// CHECK:      vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
-               vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK:      vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
 
-// CHECK:      vsm4key4  8128(%ecx), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
-               vsm4key4  8128(%ecx), %zmm3, %zmm2
+// CHECK:      vsm4key4  8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+               vsm4key4  8128(%rcx), %zmm23, %zmm22
 
-// CHECK:      vsm4key4  -8192(%edx), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
-               vsm4key4  -8192(%edx), %zmm3, %zmm2
+// CHECK:      vsm4key4  -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+               vsm4key4  -8192(%rdx), %zmm23, %zmm22
 
-// CHECK:      vsm4rnds4 %zmm4, %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
-               vsm4rnds4 %zmm4, %zmm3, %zmm2
+// CHECK:      vsm4rnds4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+               vsm4rnds4 %zmm24, %zmm23, %zmm22
 
-// CHECK:      vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
 
-// CHECK:      vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
 
-// CHECK:      vsm4rnds4  (%eax), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
-               vsm4rnds4  (%eax), %zmm3, %zmm2
+// CHECK:      vsm4rnds4  (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %zmm23, %zmm22
 
-// CHECK:      vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
-               vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK:      vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
 
-// CHECK:      vsm4rnds4  8128(%ecx), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
-               vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+// CHECK:      vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+               vsm4rnds4  8128(%rcx), %zmm23, %zmm22
 
-// CHECK:      vsm4rnds4  -8192(%edx), %zmm3, %zmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
-               vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+// CHECK:      vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+               vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
 
-// CHECK:      {evex} vsm4key4 %ymm4, %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
-               {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 %ymm24, %ymm23, %ymm22
 
-// CHECK:      {evex} vsm4key4 %xmm4, %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
-               {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 %xmm24, %xmm23, %xmm22
 
-// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
 
-// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
 
-// CHECK:      {evex} vsm4key4  (%eax), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
-               {evex} vsm4key4  (%eax), %ymm3, %ymm2
+// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %ymm23, %ymm22
 
-// CHECK:      {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
-               {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
 
-// CHECK:      {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
-               {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
+// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4  4064(%rcx), %ymm23, %ymm22
 
-// CHECK:      {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
-               {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
+// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4  -4096(%rdx), %ymm23, %ymm22
 
-// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
 
-// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
 
-// CHECK:      {evex} vsm4key4  (%eax), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
-               {evex} vsm4key4  (%eax), %xmm3, %xmm2
+// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %xmm23, %xmm22
 
-// CHECK:      {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
-               {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
 
-// CHECK:      {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
-               {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
+// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4  2032(%rcx), %xmm23, %xmm22
 
-// CHECK:      {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
-               {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
+// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 %ymm24, %ymm23, %ymm22
 
-// CHECK:      {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
-               {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 %xmm24, %xmm23, %xmm22
 
-// CHECK:      {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
-               {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4  -2048(%rdx), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+               vsm4rnds4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+               vsm4rnds4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+               vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+               vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
 
-// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK:      vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+               vsm4rnds4  2032(%rcx), %xmm23, %xmm22
 
-// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
-
-// CHECK:      {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
-               {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
-
-// CHECK:      {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
-               {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
-
-// CHECK:      {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
-               {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
-
-// CHECK:      {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
-               {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
-
-// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
-
-// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
-
-// CHECK:      {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
-               {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
-
-// CHECK:      {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
-               {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
-
-// CHECK:      {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
-               {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
-
-// CHECK:      {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
-               {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
\ No newline at end of file
+// CHECK:      vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+               vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
diff --git a/llvm/test/MC/X86/sm4-evex-64-intel.s b/llvm/test/MC/X86/sm4-evex-64-intel.s
index 812fdb13f80913..3cc18cf4178ed8 100644
--- a/llvm/test/MC/X86/sm4-evex-64-intel.s
+++ b/llvm/test/MC/X86/sm4-evex-64-intel.s
@@ -1,169 +1,169 @@
-// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
 
-// CHECK:      vsm4key4 zmm2, zmm3, zmm4
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
-               vsm4key4 zmm2, zmm3, zmm4
+// CHECK:      vsm4key4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+               vsm4key4 zmm22, zmm23, zmm24
 
-// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
 
-// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
 
-// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
-               vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rip]
 
-// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
-               vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
 
-// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
-               vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
 
-// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
-// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
-               vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
 
-// CHECK:      vsm4rnds4 zmm2, zmm3, zmm4
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
-               vsm4rnds4 zmm2, zmm3, zmm4
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+               vsm4rnds4 zmm22, zmm23, zmm24
 
-// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
 
-// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
 
-// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
-               vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
 
-// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
-               vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
 
-// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
-               vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
 
-// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
-// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
-               vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
 
-// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymm4
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
-               {evex} vsm4key4 ymm2, ymm3, ymm4
+// CHECK:      vsm4key4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 ymm22, ymm23, ymm24
 
-// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmm4
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
-               {evex} vsm4key4 xmm2, xmm3, xmm4
+// CHECK:      vsm4key4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 xmm22, xmm23, xmm24
 
-// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
 
-// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
 
-// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
-               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rip]
 
-// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
-               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
 
-// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
-               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
 
-// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
-// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
-               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
 
-// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
 
-// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
 
-// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
-               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rip]
 
-// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
-               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
 
-// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
-               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
 
-// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
-// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
-               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
 
-// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymm4
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
-               {evex} vsm4rnds4 ymm2, ymm3, ymm4
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+               vsm4rnds4 ymm22, ymm23, ymm24
 
-// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmm4
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
-               {evex} vsm4rnds4 xmm2, xmm3, xmm4
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+               vsm4rnds4 xmm22, xmm23, xmm24
 
-// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
 
-// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
 
-// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
-               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
 
-// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
-               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
 
-// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
-               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
 
-// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
-// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
-               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
 
-// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
-               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
 
-// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
-               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
 
-// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
-               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
 
-// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
-               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
 
-// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
-               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
 
-// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
-// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
-               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]

>From 39513cdd193f3dc48e7df635b50c70ea51f2adef Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 24 Oct 2024 15:37:09 +0800
Subject: [PATCH 3/4] address comments

---
 llvm/lib/Target/X86/X86InstrAVX10.td         | 2 +-
 llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index d6fab07ca2aaf1..d2873e5b1f7056 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1651,7 +1651,7 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
 // SM4(EVEX)
 multiclass avx10_sm4_base<string OpStr> {
   // SM4_Base is in X86InstrSSE.td.
-  let Predicates = [HasSM4, HasAVX10_2] in {
+  let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in {
     defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128;
     defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256;
   }
diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
index fc46d3cf23fd41..825a11d66cd452 100644
--- a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -5,7 +5,7 @@
 define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
 ; CHECK-LABEL: test_int_x86_vsm4key4128:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsm4key4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
+; CHECK-NEXT:    vsm4key4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
   ret <4 x i32> %ret
@@ -15,7 +15,7 @@ declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
 define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
 ; CHECK-LABEL: test_int_x86_vsm4key4256:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsm4key4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
+; CHECK-NEXT:    vsm4key4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
   ret <8 x i32> %ret
@@ -35,7 +35,7 @@ declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
 define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
 ; CHECK-LABEL: test_int_x86_vsm4rnds4128:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsm4rnds4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
+; CHECK-NEXT:    vsm4rnds4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
   ret <4 x i32> %ret
@@ -45,7 +45,7 @@ declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
 define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
 ; CHECK-LABEL: test_int_x86_vsm4rnds4256:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsm4rnds4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
+; CHECK-NEXT:    vsm4rnds4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
   ret <8 x i32> %ret

>From c0a33abe804bafe11e9dcecb092b924bcd08cdb7 Mon Sep 17 00:00:00 2001
From: Freddy Ye <freddy.ye at intel.com>
Date: Thu, 24 Oct 2024 16:55:11 +0800
Subject: [PATCH 4/4] address comments

---
 clang/lib/Headers/immintrin.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 1b83dd2162707c..7d922267174279 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -665,9 +665,11 @@ _storebe_i64(void * __P, long long __D) {
 #include <avx10_2_512niintrin.h>
 #include <avx10_2_512satcvtdsintrin.h>
 #include <avx10_2_512satcvtintrin.h>
-#if (defined(__SM4__))
-#include <sm4evexintrin.h>
 #endif
+
+#if !defined(__SCE__) || __has_feature(modules) ||                             \
+    (defined(__AVX10_2_512__) && defined(__SM4__))
+#include <movrs_avx10_2_512intrin.h>
 #endif
 
 #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)



More information about the cfe-commits mailing list