[llvm] 5aa1275 - [X86] Support SM4 EVEX version intrinsics/instructions. (#113402)

via llvm-commits llvm-commits at lists.llvm.org
Sun Oct 27 19:46:21 PDT 2024


Author: Freddy Ye
Date: 2024-10-28T10:46:16+08:00
New Revision: 5aa1275d03b679f45f47f29f206292f663afed83

URL: https://github.com/llvm/llvm-project/commit/5aa1275d03b679f45f47f29f206292f663afed83
DIFF: https://github.com/llvm/llvm-project/commit/5aa1275d03b679f45f47f29f206292f663afed83.diff

LOG: [X86] Support SM4 EVEX version intrinsics/instructions. (#113402)

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368

Added: 
    clang/lib/Headers/sm4evexintrin.h
    clang/test/CodeGen/X86/sm4-evex-builtins.c
    llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
    llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
    llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
    llvm/test/MC/X86/sm4-evex-32-att.s
    llvm/test/MC/X86/sm4-evex-32-intel.s
    llvm/test/MC/X86/sm4-evex-64-att.s
    llvm/test/MC/X86/sm4-evex-64-intel.s

Modified: 
    clang/docs/ReleaseNotes.rst
    clang/include/clang/Basic/BuiltinsX86.def
    clang/lib/Headers/CMakeLists.txt
    clang/lib/Headers/immintrin.h
    llvm/docs/ReleaseNotes.md
    llvm/include/llvm/IR/IntrinsicsX86.td
    llvm/lib/Target/X86/X86InstrAVX10.td
    llvm/test/TableGen/x86-fold-tables.inc

Removed: 
    


################################################################################
diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 6a95337815174b..31ee4f7e516fed 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -628,6 +628,10 @@ X86 Support
   * Supported MINMAX intrinsics of ``*_(mask(z)))_minmax(ne)_p[s|d|h|bh]`` and
   ``*_(mask(z)))_minmax_s[s|d|h]``.
 
+- Supported intrinsics for ``SM4 and AVX10.2``.
+  * Supported SM4 intrinsics of ``_mm512_sm4key4_epi32`` and
+  ``_mm512_sm4rnds4_epi32``.
+
 - All intrinsics in adcintrin.h can now be used in constant expressions.
 
 - All intrinsics in adxintrin.h can now be used in constant expressions.

diff  --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421ca..4486eb73a11fa6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 
+// SM4_EVEX
+TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+
 // AVX10 MINMAX
 TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")

diff  --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index e97953d87a2ff9..0211d1870b30a0 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -243,6 +243,7 @@ set(x86_files
   shaintrin.h
   sm3intrin.h
   sm4intrin.h
+  sm4evexintrin.h
   smmintrin.h
   tbmintrin.h
   tmmintrin.h

diff  --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 5f296d0a3324d0..65ad72bc479f49 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -677,6 +677,11 @@ _storebe_i64(void * __P, long long __D) {
 #include <avx10_2_512satcvtintrin.h>
 #endif
 
+#if !defined(__SCE__) || __has_feature(modules) ||                             \
+    (defined(__AVX10_2_512__) && defined(__SM4__))
+#include <sm4evexintrin.h>
+#endif
+
 #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
 #include <enqcmdintrin.h>
 #endif

diff  --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h
new file mode 100644
index 00000000000000..f6ae0037baea03
--- /dev/null
+++ b/clang/lib/Headers/sm4evexintrin.h
@@ -0,0 +1,32 @@
+/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __SM4EVEXINTRIN_H
+#define __SM4EVEXINTRIN_H
+
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("sm4,avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif // __SM4EVEXINTRIN_H

diff  --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c
new file mode 100644
index 00000000000000..0e54bd008d4fb0
--- /dev/null
+++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_sm4key4_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_sm4key4_epi32(__A, __B);
+}
+
+__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_sm4rnds4_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_sm4rnds4_epi32(__A, __B);
+}

diff  --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index 3cac3e57344dc3..7c7e687e94749e 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -219,6 +219,8 @@ Changes to the X86 Backend
 
 * Supported instructions of `MOVRS AND AVX10.2`
 
+* Supported ISA of `SM4(EVEX)`.
+
 Changes to the OCaml bindings
 -----------------------------
 

diff  --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index d0083017fb9383..0ecca157077fdc 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6099,6 +6099,11 @@ let TargetPrefix = "x86" in {
         DefaultAttrsIntrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_v8i32_ty],
         [IntrNoMem]>;
+  def int_x86_vsm4key4512
+      : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+        DefaultAttrsIntrinsic<[llvm_v16i32_ty],
+        [llvm_v16i32_ty, llvm_v16i32_ty],
+        [IntrNoMem]>;
   def int_x86_vsm4rnds4128
       : ClangBuiltin<"__builtin_ia32_vsm4rnds4128">,
         DefaultAttrsIntrinsic<[llvm_v4i32_ty],
@@ -6109,6 +6114,11 @@ let TargetPrefix = "x86" in {
         DefaultAttrsIntrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_v8i32_ty],
         [IntrNoMem]>;
+  def int_x86_vsm4rnds4512
+      : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
+        DefaultAttrsIntrinsic<[llvm_v16i32_ty],
+        [llvm_v16i32_ty, llvm_v16i32_ty],
+        [IntrNoMem]>;
 }
 //===----------------------------------------------------------------------===//
 // RAO-INT intrinsics

diff  --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 9ef2debb57fa00..4d64eb776e09ce 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1675,3 +1675,17 @@ defm VMOVRSD : vmovrs_p_vl<0x6f, "vmovrsd", avx512vl_i32_info>,
                           T_MAP5, XS, EVEX_CD8<32, CD8VF>, Sched<[WriteVecLoad]>;
 defm VMOVRSQ : vmovrs_p_vl<0x6f, "vmovrsq", avx512vl_i64_info>,
                           T_MAP5, XS, REX_W, EVEX_CD8<64, CD8VF>, Sched<[WriteVecLoad]>;
+
+// SM4(EVEX)
+multiclass avx10_sm4_base<string OpStr> {
+  // SM4_Base is in X86InstrSSE.td.
+  let Predicates = [HasSM4, HasAVX10_2], AddedComplexity = 1 in {
+    defm Z128 : SM4_Base<OpStr, VR128X, "128", loadv4i32, i128mem>, EVEX_V128;
+    defm Z256 : SM4_Base<OpStr, VR256X, "256", loadv8i32, i256mem>, EVEX_V256;
+  }
+  let Predicates = [HasSM4, HasAVX10_2_512] in
+    defm Z : SM4_Base<OpStr, VR512, "512", loadv16i32, i512mem>, EVEX_V512;
+}
+
+defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
+defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;

diff  --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
new file mode 100644
index 00000000000000..825a11d66cd452
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+
+define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+  ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+  ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+  ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+
+define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+  ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+  ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+  ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+

diff  --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
new file mode 100644
index 00000000000000..c1cb271a967b13
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vsm4key4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0xda,0xd4
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x66,0x48,0xda,0x10
+
+# ATT:        vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4key4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x66,0x48,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0xda,0xd4
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x67,0x48,0xda,0x10
+
+# ATT:        vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x67,0x48,0xda,0x52,0x80
+
+# ATT:        vsm4key4 %ymm4, %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0xda,0xd4
+
+# ATT:        vsm4key4 %xmm4, %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0xda,0xd4
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x66,0x28,0xda,0x10
+
+# ATT:        vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4key4  4064(%ecx), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x66,0x28,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -4096(%edx), %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x66,0x28,0xda,0x52,0x80
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x66,0x08,0xda,0x10
+
+# ATT:        vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4key4  2032(%ecx), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x66,0x08,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -2048(%edx), %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x66,0x08,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4 %ymm4, %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymm4
+0x62,0xf2,0x67,0x28,0xda,0xd4
+
+# ATT:        vsm4rnds4 %xmm4, %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmm4
+0x62,0xf2,0x67,0x08,0xda,0xd4
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+0x62,0xf2,0x67,0x28,0xda,0x10
+
+# ATT:        vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+0x62,0xf2,0x67,0x28,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+# INTEL:      vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+0x62,0xf2,0x67,0x28,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+0x62,0xf2,0x67,0x08,0xda,0x10
+
+# ATT:        vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+0x62,0xf2,0x67,0x08,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -2048(%edx), %xmm3, %xmm2
+# INTEL:      vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+0x62,0xf2,0x67,0x08,0xda,0x52,0x80

diff  --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
new file mode 100644
index 00000000000000..f89f4b5a8c0fb8
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vsm4key4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0xda,0xf0
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4key4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x46,0x40,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0xda,0xf0
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x47,0x40,0xda,0x72,0x80
+
+# ATT:        vsm4key4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0xda,0xf0
+
+# ATT:        vsm4key4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0xda,0xf0
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4key4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x46,0x20,0xda,0x72,0x80
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4key4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x46,0x00,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0xda,0xf0
+
+# ATT:        vsm4rnds4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0xda,0xf0
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x47,0x20,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x47,0x00,0xda,0x72,0x80

diff  --git a/llvm/test/MC/X86/sm4-evex-32-att.s b/llvm/test/MC/X86/sm4-evex-32-att.s
new file mode 100644
index 00000000000000..de10d95ac74d7b
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-32-att.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple i686-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+               vsm4key4 %zmm4, %zmm3, %zmm2
+
+// CHECK:      vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+               vsm4key4  (%eax), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+               vsm4key4  8128(%ecx), %zmm3, %zmm2
+
+// CHECK:      vsm4key4  -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+               vsm4key4  -8192(%edx), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4 %zmm4, %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+               vsm4rnds4 %zmm4, %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  (%eax), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+               vsm4rnds4  (%eax), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+               vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+
+// CHECK:      vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+               vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+
+// CHECK:      {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+               {evex} vsm4key4 %ymm4, %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+               {evex} vsm4key4 %xmm4, %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4  268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4  291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+               {evex} vsm4key4  (%eax), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4key4  -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+               {evex} vsm4key4  4064(%ecx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+               {evex} vsm4key4  -4096(%edx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4  268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4  291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+               {evex} vsm4key4  (%eax), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4key4  -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+               {evex} vsm4key4  2032(%ecx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+               {evex} vsm4key4  -2048(%edx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+               {evex} vsm4rnds4 %ymm4, %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+               {evex} vsm4rnds4 %xmm4, %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4  268435456(%esp,%esi,8), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4  291(%edi,%eax,4), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+               {evex} vsm4rnds4  (%eax), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4rnds4  -1024(,%ebp,2), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+               {evex} vsm4rnds4  4064(%ecx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+               {evex} vsm4rnds4  -4096(%edx), %ymm3, %ymm2
+
+// CHECK:      {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4  268435456(%esp,%esi,8), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4  291(%edi,%eax,4), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+               {evex} vsm4rnds4  (%eax), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4rnds4  -512(,%ebp,2), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+               {evex} vsm4rnds4  2032(%ecx), %xmm3, %xmm2
+
+// CHECK:      {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+               {evex} vsm4rnds4  -2048(%edx), %xmm3, %xmm2
\ No newline at end of file

diff  --git a/llvm/test/MC/X86/sm4-evex-32-intel.s b/llvm/test/MC/X86/sm4-evex-32-intel.s
new file mode 100644
index 00000000000000..812fdb13f80913
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-32-intel.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple i686-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0xd4]
+               vsm4key4 zmm2, zmm3, zmm4
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x10]
+               vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x51,0x7f]
+               vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x66,0x48,0xda,0x52,0x80]
+               vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0xd4]
+               vsm4rnds4 zmm2, zmm3, zmm4
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x10]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x51,0x7f]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+
+// CHECK:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+// CHECK: encoding: [0x62,0xf2,0x67,0x48,0xda,0x52,0x80]
+               vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0xd4]
+               {evex} vsm4key4 ymm2, ymm3, ymm4
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0xd4]
+               {evex} vsm4key4 xmm2, xmm3, xmm4
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x10]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x51,0x7f]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK:      {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x66,0x28,0xda,0x52,0x80]
+               {evex} vsm4key4 ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x10]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x51,0x7f]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK:      {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x66,0x08,0xda,0x52,0x80]
+               {evex} vsm4key4 xmm2, xmm3, xmmword ptr [edx - 2048]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0xd4]
+               {evex} vsm4rnds4 ymm2, ymm3, ymm4
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmm4
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0xd4]
+               {evex} vsm4rnds4 xmm2, xmm3, xmm4
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x10]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [eax]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x14,0x6d,0x00,0xfc,0xff,0xff]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [2*ebp - 1024]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x51,0x7f]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [ecx + 4064]
+
+// CHECK:      {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+// CHECK: encoding: [0x62,0xf2,0x67,0x28,0xda,0x52,0x80]
+               {evex} vsm4rnds4 ymm2, ymm3, ymmword ptr [edx - 4096]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0xf4,0x00,0x00,0x00,0x10]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [esp + 8*esi + 268435456]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x94,0x87,0x23,0x01,0x00,0x00]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edi + 4*eax + 291]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x10]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [eax]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x14,0x6d,0x00,0xfe,0xff,0xff]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [2*ebp - 512]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x51,0x7f]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [ecx + 2032]
+
+// CHECK:      {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]
+// CHECK: encoding: [0x62,0xf2,0x67,0x08,0xda,0x52,0x80]
+               {evex} vsm4rnds4 xmm2, xmm3, xmmword ptr [edx - 2048]

diff  --git a/llvm/test/MC/X86/sm4-evex-64-att.s b/llvm/test/MC/X86/sm4-evex-64-att.s
new file mode 100644
index 00000000000000..389a29b1189795
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-64-att.s
@@ -0,0 +1,224 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+               vsm4key4 %zmm24, %zmm23, %zmm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+               vsm4key4  8128(%rcx), %zmm23, %zmm22
+
+// CHECK:      vsm4key4  -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+               vsm4key4  -8192(%rdx), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4 %zmm24, %zmm23, %zmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+               vsm4rnds4 %zmm24, %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  (%rip), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+               vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+
+// CHECK:      vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+               vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+
+// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK:      vsm4key4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4key4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4key4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4  -2048(%rdx), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4 %ymm24, %ymm23, %ymm22
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+               vsm4rnds4 %ymm24, %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4 %xmm24, %xmm23, %xmm22
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+               vsm4rnds4 %xmm24, %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  (%rip), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+               vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+               vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+
+// CHECK:      vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  (%rip), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4  (%rip), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+               vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+
+// CHECK:      vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+               vsm4rnds4  -2048(%rdx), %xmm23, %xmm22

diff  --git a/llvm/test/MC/X86/sm4-evex-64-intel.s b/llvm/test/MC/X86/sm4-evex-64-intel.s
new file mode 100644
index 00000000000000..3cc18cf4178ed8
--- /dev/null
+++ b/llvm/test/MC/X86/sm4-evex-64-intel.s
@@ -0,0 +1,169 @@
+// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x40,0xda,0xf0]
+               vsm4key4 zmm22, zmm23, zmm24
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x71,0x7f]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x46,0x40,0xda,0x72,0x80]
+               vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x40,0xda,0xf0]
+               vsm4rnds4 zmm22, zmm23, zmm24
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x71,0x7f]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+
+// CHECK:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+// CHECK: encoding: [0x62,0xe2,0x47,0x40,0xda,0x72,0x80]
+               vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x46,0x20,0xda,0xf0]
+               vsm4key4 ymm22, ymm23, ymm24
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x46,0x00,0xda,0xf0]
+               vsm4key4 xmm22, xmm23, xmm24
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x71,0x7f]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x46,0x20,0xda,0x72,0x80]
+               vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x71,0x7f]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x46,0x00,0xda,0x72,0x80]
+               vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymm24
+// CHECK: encoding: [0x62,0x82,0x47,0x20,0xda,0xf0]
+               vsm4rnds4 ymm22, ymm23, ymm24
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmm24
+// CHECK: encoding: [0x62,0x82,0x47,0x00,0xda,0xf0]
+               vsm4rnds4 xmm22, xmm23, xmm24
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x71,0x7f]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+
+// CHECK:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+// CHECK: encoding: [0x62,0xe2,0x47,0x20,0xda,0x72,0x80]
+               vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+// CHECK: encoding: [0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+// CHECK: encoding: [0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x71,0x7f]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+
+// CHECK:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+// CHECK: encoding: [0x62,0xe2,0x47,0x00,0xda,0x72,0x80]
+               vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]

diff  --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc
index 85d9b02ac0cbf1..43c206fa0af698 100644
--- a/llvm/test/TableGen/x86-fold-tables.inc
+++ b/llvm/test/TableGen/x86-fold-tables.inc
@@ -4113,8 +4113,14 @@ static const X86FoldTableEntry Table2[] = {
   {X86::VSHUFPSZrri, X86::VSHUFPSZrmi, 0},
   {X86::VSHUFPSrri, X86::VSHUFPSrmi, 0},
   {X86::VSM4KEY4Yrr, X86::VSM4KEY4Yrm, 0},
+  {X86::VSM4KEY4Z128rr, X86::VSM4KEY4Z128rm, 0},
+  {X86::VSM4KEY4Z256rr, X86::VSM4KEY4Z256rm, 0},
+  {X86::VSM4KEY4Zrr, X86::VSM4KEY4Zrm, 0},
   {X86::VSM4KEY4rr, X86::VSM4KEY4rm, 0},
   {X86::VSM4RNDS4Yrr, X86::VSM4RNDS4Yrm, 0},
+  {X86::VSM4RNDS4Z128rr, X86::VSM4RNDS4Z128rm, 0},
+  {X86::VSM4RNDS4Z256rr, X86::VSM4RNDS4Z256rm, 0},
+  {X86::VSM4RNDS4Zrr, X86::VSM4RNDS4Zrm, 0},
   {X86::VSM4RNDS4rr, X86::VSM4RNDS4rm, 0},
   {X86::VSQRTNEPBF16Z128rkz, X86::VSQRTNEPBF16Z128mkz, 0},
   {X86::VSQRTNEPBF16Z256rkz, X86::VSQRTNEPBF16Z256mkz, 0},


        


More information about the llvm-commits mailing list