[clang] [llvm] [X86] Support SM4 EVEX version intrinsics/instructions. (PR #113402)

via cfe-commits cfe-commits at lists.llvm.org
Tue Oct 22 18:42:54 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Freddy Ye (FreddyLeaf)

<details>
<summary>Changes</summary>

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/671368


---

Patch is 59.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/113402.diff


16 Files Affected:

- (modified) clang/include/clang/Basic/BuiltinsX86.def (+4) 
- (modified) clang/lib/Headers/CMakeLists.txt (+1) 
- (modified) clang/lib/Headers/immintrin.h (+3) 
- (added) clang/lib/Headers/sm4evexintrin.h (+32) 
- (added) clang/test/CodeGen/X86/sm4-evex-builtins.c (+19) 
- (modified) llvm/docs/ReleaseNotes.md (+2) 
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+6) 
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+20) 
- (added) llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll (+64) 
- (added) llvm/test/MC/Disassembler/X86/sm4-evex-32.txt (+170) 
- (added) llvm/test/MC/Disassembler/X86/sm4-evex-64.txt (+170) 
- (added) llvm/test/MC/X86/sm4-evex-32-att.s (+224) 
- (added) llvm/test/MC/X86/sm4-evex-32-intel.s (+169) 
- (added) llvm/test/MC/X86/sm4-evex-64-att.s (+169) 
- (added) llvm/test/MC/X86/sm4-evex-64-intel.s (+169) 
- (modified) llvm/test/TableGen/x86-fold-tables.inc (+6) 


``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 4c6b22cca421ca..4486eb73a11fa6 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2179,6 +2179,10 @@ TARGET_BUILTIN(__builtin_ia32_vsm4key4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4128, "V4UiV4UiV4Ui", "nV:128:", "sm4")
 TARGET_BUILTIN(__builtin_ia32_vsm4rnds4256, "V8UiV8UiV8Ui", "nV:256:", "sm4")
 
+// SM4_EVEX
+TARGET_BUILTIN(__builtin_ia32_vsm4key4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+TARGET_BUILTIN(__builtin_ia32_vsm4rnds4512, "V16UiV16UiV16Ui", "nV:512:", "avx10.2-512,sm4")
+
 // AVX10 MINMAX
 TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16128, "V8yV8yV8yIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxnepbf16256, "V16yV16yV16yIi", "nV:256:", "avx10.2-256")
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ff392e7122a448..6a594dad0b67d2 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -241,6 +241,7 @@ set(x86_files
   shaintrin.h
   sm3intrin.h
   sm4intrin.h
+  sm4evexintrin.h
   smmintrin.h
   tbmintrin.h
   tmmintrin.h
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index 3fbabffa98df20..1b83dd2162707c 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -665,6 +665,9 @@ _storebe_i64(void * __P, long long __D) {
 #include <avx10_2_512niintrin.h>
 #include <avx10_2_512satcvtdsintrin.h>
 #include <avx10_2_512satcvtintrin.h>
+#if (defined(__SM4__))
+#include <sm4evexintrin.h>
+#endif
 #endif
 
 #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
diff --git a/clang/lib/Headers/sm4evexintrin.h b/clang/lib/Headers/sm4evexintrin.h
new file mode 100644
index 00000000000000..f6ae0037baea03
--- /dev/null
+++ b/clang/lib/Headers/sm4evexintrin.h
@@ -0,0 +1,32 @@
+/*===--------------- sm4evexintrin.h - SM4 EVEX intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error "Never use <sm4evexintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __SM4EVEXINTRIN_H
+#define __SM4EVEXINTRIN_H
+
+#define __DEFAULT_FN_ATTRS512                                                  \
+  __attribute__((__always_inline__, __nodebug__,                               \
+                 __target__("sm4,avx10.2-512"), __min_vector_width__(512)))
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_vsm4key4512((__v16su)__A, (__v16su)__B);
+}
+
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
+_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+  return (__m512i)__builtin_ia32_vsm4rnds4512((__v16su)__A, (__v16su)__B);
+}
+
+#undef __DEFAULT_FN_ATTRS512
+
+#endif // __SM4EVEXINTRIN_H
diff --git a/clang/test/CodeGen/X86/sm4-evex-builtins.c b/clang/test/CodeGen/X86/sm4-evex-builtins.c
new file mode 100644
index 00000000000000..0e54bd008d4fb0
--- /dev/null
+++ b/clang/test/CodeGen/X86/sm4-evex-builtins.c
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-- -target-feature +sm4 \
+// RUN: -target-feature +avx10.2-512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+#include <stddef.h>
+
+__m512i test_mm512_sm4key4_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_sm4key4_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_sm4key4_epi32(__A, __B);
+}
+
+__m512i test_mm512_sm4rnds4_epi32(__m512i __A, __m512i __B) {
+  // CHECK-LABEL: @test_mm512_sm4rnds4_epi32(
+  // CHECK: call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}})
+  return _mm512_sm4rnds4_epi32(__A, __B);
+}
diff --git a/llvm/docs/ReleaseNotes.md b/llvm/docs/ReleaseNotes.md
index e5853789c78b63..16764210537689 100644
--- a/llvm/docs/ReleaseNotes.md
+++ b/llvm/docs/ReleaseNotes.md
@@ -196,6 +196,8 @@ Changes to the X86 Backend
 
 * Support ISA of `AVX10.2-256` and `AVX10.2-512`.
 
+* Support ISA of `SM4(EVEX)`.
+
 Changes to the OCaml bindings
 -----------------------------
 
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index 5262e3154ff721..7725bda1f4f598 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -6109,6 +6109,12 @@ let TargetPrefix = "x86" in {
         DefaultAttrsIntrinsic<[llvm_v8i32_ty],
         [llvm_v8i32_ty, llvm_v8i32_ty],
         [IntrNoMem]>;
+def int_x86_vsm4key4512 : ClangBuiltin<"__builtin_ia32_vsm4key4512">,
+        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+        [IntrNoMem]>;
+def int_x86_vsm4rnds4512 : ClangBuiltin<"__builtin_ia32_vsm4rnds4512">,
+        Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, llvm_v16i32_ty],
+        [IntrNoMem]>;
 }
 //===----------------------------------------------------------------------===//
 // RAO-INT intrinsics
diff --git a/llvm/lib/Target/X86/X86InstrAVX10.td b/llvm/lib/Target/X86/X86InstrAVX10.td
index 625f2e01d47218..640011f5ed28d7 100644
--- a/llvm/lib/Target/X86/X86InstrAVX10.td
+++ b/llvm/lib/Target/X86/X86InstrAVX10.td
@@ -1647,3 +1647,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in
   def : InstAlias<"vmovw.s\t{$src, $dst|$dst, $src}",
                   (VMOVZPWILo2PWIZrr2 VR128X:$dst, VR128X:$src), 0>;
 }
+
+// SM4(EVEX)
+multiclass avx10_sm4_base<string OpStr> {
+  // SM4_Base is in X86InstrSSE.td.
+  let Predicates = [HasSM4, HasAVX10_2] in {
+    defm Z128 : SM4_Base<OpStr, avx512vl_i32_info.info128.RC,
+                "128", avx512vl_i32_info.info128.LdFrag,
+                avx512vl_i32_info.info128.MemOp>, EVEX_V128;
+    defm Z256 : SM4_Base<OpStr, avx512vl_i32_info.info256.RC,
+                "256", avx512vl_i32_info.info256.LdFrag,
+                avx512vl_i32_info.info256.MemOp>, EVEX_V256;
+  }
+  let Predicates = [HasSM4, HasAVX10_2_512] in
+    defm Z : SM4_Base<OpStr, avx512vl_i32_info.info512.RC,
+              "512", avx512vl_i32_info.info512.LdFrag,
+              avx512vl_i32_info.info512.MemOp>, EVEX_V512;
+}
+
+defm VSM4KEY4 : avx10_sm4_base<"vsm4key4">, T8, XS, EVEX, VVVV;
+defm VSM4RNDS4 : avx10_sm4_base<"vsm4rnds4">, T8, XD, EVEX, VVVV;
diff --git a/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
new file mode 100644
index 00000000000000..fc46d3cf23fd41
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sm4-evex-intrinsics.ll
@@ -0,0 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-- --show-mc-encoding -mattr=+sm4,+avx10.2-512 | FileCheck %s
+
+define <4 x i32> @test_int_x86_vsm4key4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7a,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+  ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4key4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4key4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7e,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+  ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4key4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4key4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4key4512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4key4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+  ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4key4512(<16 x i32> %A, <16 x i32> %B)
+
+define <4 x i32> @test_int_x86_vsm4rnds4128(<4 x i32> %A, <4 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4128:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x7b,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+  ret <4 x i32> %ret
+}
+declare <4 x i32> @llvm.x86.vsm4rnds4128(<4 x i32> %A, <4 x i32> %B)
+
+define <8 x i32> @test_int_x86_vsm4rnds4256(<8 x i32> %A, <8 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4256:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+  ret <8 x i32> %ret
+}
+declare <8 x i32> @llvm.x86.vsm4rnds4256(<8 x i32> %A, <8 x i32> %B)
+
+define <16 x i32> @test_int_x86_vsm4rnds4512(<16 x i32> %A, <16 x i32> %B) {
+; CHECK-LABEL: test_int_x86_vsm4rnds4512:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsm4rnds4 %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7f,0x48,0xda,0xc1]
+; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
+  %ret = call <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+  ret <16 x i32> %ret
+}
+declare <16 x i32> @llvm.x86.vsm4rnds4512(<16 x i32> %A, <16 x i32> %B)
+
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
new file mode 100644
index 00000000000000..f89f4b5a8c0fb8
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-32.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vsm4key4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmm24
+0x62,0x82,0x46,0x40,0xda,0xf0
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x46,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x46,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4key4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x46,0x40,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4key4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x46,0x40,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4 %zmm24, %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmm24
+0x62,0x82,0x47,0x40,0xda,0xf0
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x40,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x40,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rip]
+0x62,0xe2,0x47,0x40,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -2048(,%rbp,2), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [2*rbp - 2048]
+0x62,0xe2,0x47,0x40,0xda,0x34,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4rnds4  8128(%rcx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rcx + 8128]
+0x62,0xe2,0x47,0x40,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -8192(%rdx), %zmm23, %zmm22
+# INTEL:      vsm4rnds4 zmm22, zmm23, zmmword ptr [rdx - 8192]
+0x62,0xe2,0x47,0x40,0xda,0x72,0x80
+
+# ATT:        vsm4key4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymm24
+0x62,0x82,0x46,0x20,0xda,0xf0
+
+# ATT:        vsm4key4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmm24
+0x62,0x82,0x46,0x00,0xda,0xf0
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x46,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x46,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4key4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x46,0x20,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4key4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x46,0x20,0xda,0x72,0x80
+
+# ATT:        vsm4key4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x46,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x46,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x46,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4key4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x46,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4key4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x46,0x00,0xda,0x71,0x7f
+
+# ATT:        vsm4key4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4key4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x46,0x00,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4 %ymm24, %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymm24
+0x62,0x82,0x47,0x20,0xda,0xf0
+
+# ATT:        vsm4rnds4 %xmm24, %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmm24
+0x62,0x82,0x47,0x00,0xda,0xf0
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x20,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x20,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rip]
+0x62,0xe2,0x47,0x20,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -1024(,%rbp,2), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [2*rbp - 1024]
+0x62,0xe2,0x47,0x20,0xda,0x34,0x6d,0x00,0xfc,0xff,0xff
+
+# ATT:        vsm4rnds4  4064(%rcx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rcx + 4064]
+0x62,0xe2,0x47,0x20,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -4096(%rdx), %ymm23, %ymm22
+# INTEL:      vsm4rnds4 ymm22, ymm23, ymmword ptr [rdx - 4096]
+0x62,0xe2,0x47,0x20,0xda,0x72,0x80
+
+# ATT:        vsm4rnds4  268435456(%rbp,%r14,8), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rbp + 8*r14 + 268435456]
+0x62,0xa2,0x47,0x00,0xda,0xb4,0xf5,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%r8,%rax,4), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [r8 + 4*rax + 291]
+0x62,0xc2,0x47,0x00,0xda,0xb4,0x80,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%rip), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rip]
+0x62,0xe2,0x47,0x00,0xda,0x35,0x00,0x00,0x00,0x00
+
+# ATT:        vsm4rnds4  -512(,%rbp,2), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [2*rbp - 512]
+0x62,0xe2,0x47,0x00,0xda,0x34,0x6d,0x00,0xfe,0xff,0xff
+
+# ATT:        vsm4rnds4  2032(%rcx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rcx + 2032]
+0x62,0xe2,0x47,0x00,0xda,0x71,0x7f
+
+# ATT:        vsm4rnds4  -2048(%rdx), %xmm23, %xmm22
+# INTEL:      vsm4rnds4 xmm22, xmm23, xmmword ptr [rdx - 2048]
+0x62,0xe2,0x47,0x00,0xda,0x72,0x80
diff --git a/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
new file mode 100644
index 00000000000000..c1cb271a967b13
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sm4-evex-64.txt
@@ -0,0 +1,170 @@
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386-unknown-unknown -x86-asm-syntax=intel --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT:        vsm4key4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmm4
+0x62,0xf2,0x66,0x48,0xda,0xd4
+
+# ATT:        vsm4key4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x66,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4key4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x66,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4key4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x66,0x48,0xda,0x10
+
+# ATT:        vsm4key4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x66,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4key4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x66,0x48,0xda,0x51,0x7f
+
+# ATT:        vsm4key4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4key4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x66,0x48,0xda,0x52,0x80
+
+# ATT:        vsm4rnds4 %zmm4, %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmm4
+0x62,0xf2,0x67,0x48,0xda,0xd4
+
+# ATT:        vsm4rnds4  268435456(%esp,%esi,8), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [esp + 8*esi + 268435456]
+0x62,0xf2,0x67,0x48,0xda,0x94,0xf4,0x00,0x00,0x00,0x10
+
+# ATT:        vsm4rnds4  291(%edi,%eax,4), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edi + 4*eax + 291]
+0x62,0xf2,0x67,0x48,0xda,0x94,0x87,0x23,0x01,0x00,0x00
+
+# ATT:        vsm4rnds4  (%eax), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [eax]
+0x62,0xf2,0x67,0x48,0xda,0x10
+
+# ATT:        vsm4rnds4  -2048(,%ebp,2), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [2*ebp - 2048]
+0x62,0xf2,0x67,0x48,0xda,0x14,0x6d,0x00,0xf8,0xff,0xff
+
+# ATT:        vsm4rnds4  8128(%ecx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [ecx + 8128]
+0x62,0xf2,0x67,0x48,0xda,0x51,0x7f
+
+# ATT:        vsm4rnds4  -8192(%edx), %zmm3, %zmm2
+# INTEL:      vsm4rnds4 zmm2, zmm3, zmmword ptr [edx - 8192]
+0x62,0xf2,0x67,0x48,0xda,0x52,0x80
+
+# ATT:        vsm4key4 %ymm4, %ymm3, %ymm2
+# INTEL:      vsm4key4 ymm2, ymm3, ymm4
+0x62,0xf2,0x66,0x28,0xda,0xd4
+
+# ATT:        vsm4key4 %xmm4, %xmm3, %xmm2
+# INTEL:      vsm4key4 xmm2, xmm3, xmm4
+0x62,0xf2,0x66,0x08,0xda,0xd4
+
+# ATT:        vsm4key4  268435456(%esp...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/113402


More information about the cfe-commits mailing list