[llvm] [X86]Support lowering for APX Promoted SHA/MOVDIR/CRC32/INVPCID instructions (PR #76786)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 2 23:32:07 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (XinWang10)
<details>
<summary>Changes</summary>
R16-R31 was added into GPRs in https://github.com/llvm/llvm-project/pull/70958,
This patch supports the lowering for promoted SHA/MOVDIR/CRC32/INVPCID.
RFC: https://discourse.llvm.org/t/rfc-design-for-apx-feature-egpr-and-ndd-support/73031/4
---
Patch is 22.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76786.diff
9 Files Affected:
- (modified) llvm/lib/Target/X86/X86FastISel.cpp (+4-4)
- (modified) llvm/lib/Target/X86/X86InstrSystem.td (+10-3)
- (modified) llvm/lib/Target/X86/X86InstrVMX.td (+4-4)
- (added) llvm/test/CodeGen/X86/apx/cet.ll (+50)
- (added) llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll (+61)
- (added) llvm/test/CodeGen/X86/apx/crc32.ll (+58)
- (added) llvm/test/CodeGen/X86/apx/invpcid.ll (+27)
- (added) llvm/test/CodeGen/X86/apx/movdir.ll (+38)
- (added) llvm/test/CodeGen/X86/apx/sha.ll (+186)
``````````diff
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 0ba31e173a1a72..8b4ff4c8ed878f 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3047,19 +3047,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
default:
llvm_unreachable("Unexpected intrinsic.");
case Intrinsic::x86_sse42_crc32_32_8:
- Opc = X86::CRC32r32r8;
+ Opc = Subtarget->hasEGPR() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8;
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_16:
- Opc = X86::CRC32r32r16;
+ Opc = Subtarget->hasEGPR() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16;
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_32:
- Opc = X86::CRC32r32r32;
+ Opc = Subtarget->hasEGPR() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32;
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_64_64:
- Opc = X86::CRC32r64r64;
+ Opc = Subtarget->hasEGPR() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64;
RC = &X86::GR64RegClass;
break;
}
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index 699e5847e63fb9..30530a00809f3f 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
Requires<[Not64BitMode, HasINVPCID]>;
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
- Requires<[In64BitMode, HasINVPCID]>;
+ Requires<[In64BitMode, HasINVPCID, NoEGPR]>;
def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>,
- EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>;
+ EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID, HasEGPR]>;
} // SchedRW
-let Predicates = [In64BitMode, HasINVPCID] in {
+let Predicates = [In64BitMode, HasINVPCID, NoEGPR] in {
// The instruction can only use a 64 bit register as the register argument
// in 64 bit mode, while the intrinsic only accepts a 32 bit argument
// corresponding to it.
@@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in {
addr:$src2)>;
}
+let Predicates = [In64BitMode, HasINVPCID, HasEGPR] in {
+ def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
+ (INVPCID64_EVEX
+ (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
+ addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// SMAP Instruction
diff --git a/llvm/lib/Target/X86/X86InstrVMX.td b/llvm/lib/Target/X86/X86InstrVMX.td
index 7cc468fe15ad4e..e6722467897216 100644
--- a/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/llvm/lib/Target/X86/X86InstrVMX.td
@@ -21,10 +21,10 @@ def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
Requires<[Not64BitMode]>;
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode, NoEGPR]>;
def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>,
- EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
+ EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
@@ -32,10 +32,10 @@ def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
Requires<[Not64BitMode]>;
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode, NoEGPR]>;
def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>,
- EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
+ EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
diff --git a/llvm/test/CodeGen/X86/apx/cet.ll b/llvm/test/CodeGen/X86/apx/cet.ll
new file mode 100644
index 00000000000000..98f3844d1ccd19
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/cet.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_wrssd(i32 %a, ptr %__p) {
+; CHECK-LABEL: test_wrssd:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+entry:
+ tail call void @llvm.x86.wrssd(i32 %a, ptr %__p)
+ ret void
+}
+
+declare void @llvm.x86.wrssd(i32, ptr)
+
+define void @test_wrssq(i64 %a, ptr %__p) {
+; CHECK-LABEL: test_wrssq:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+entry:
+ tail call void @llvm.x86.wrssq(i64 %a, ptr %__p)
+ ret void
+}
+
+declare void @llvm.x86.wrssq(i64, ptr)
+
+define void @test_wrussd(i32 %a, ptr %__p) {
+; CHECK-LABEL: test_wrussd:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+entry:
+ tail call void @llvm.x86.wrussd(i32 %a, ptr %__p)
+ ret void
+}
+
+declare void @llvm.x86.wrussd(i32, ptr)
+
+define void @test_wrussq(i64 %a, ptr %__p) {
+; CHECK-LABEL: test_wrussq:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+entry:
+ tail call void @llvm.x86.wrussq(i64 %a, ptr %__p)
+ ret void
+}
+
+declare void @llvm.x86.wrussq(i64, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
new file mode 100644
index 00000000000000..0b51679ccd7fb5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32,+egpr --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s
+
+define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %trunc = trunc i32 %a1 to i8
+ %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
+
+define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %trunc = trunc i32 %a1 to i16
+ %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
+
+define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone
+
+define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
+; CHECK-LABEL: test_mm_crc64_u8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %trunc = trunc i32 %a1 to i8
+ %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
+ ret i64 %res
+}
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
+
+define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
+; CHECK-LABEL: test_mm_crc64_u64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
+ ret i64 %res
+}
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/apx/crc32.ll b/llvm/test/CodeGen/X86/apx/crc32.ll
new file mode 100644
index 00000000000000..4bcc4d15cc6b5a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/crc32.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s
+
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+; CHECK-LABEL: crc32_32_8:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+ ret i32 %tmp
+}
+
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+; CHECK-LABEL: crc32_32_16:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+ ret i32 %tmp
+}
+
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: crc32_32_32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
+; CHECK-LABEL: crc32_64_8:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
+ ret i64 %tmp
+}
+
+define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: crc32_64_64:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
+ ret i64 %tmp
+}
+
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
diff --git a/llvm/test/CodeGen/X86/apx/invpcid.ll b/llvm/test/CodeGen/X86/apx/invpcid.ll
new file mode 100644
index 00000000000000..389895f4921305
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/invpcid.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_invpcid(i32 %type, ptr %descriptor) {
+; CHECK-LABEL: test_invpcid:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ call void @llvm.x86.invpcid(i32 %type, ptr %descriptor)
+ ret void
+}
+
+define void @test_invpcid2(ptr readonly %type, ptr %descriptor) {
+; CHECK-LABEL: test_invpcid2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
+; CHECK-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load i32, ptr %type, align 4
+ tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1
+ ret void
+}
+
+declare void @llvm.x86.invpcid(i32, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/movdir.ll b/llvm/test/CodeGen/X86/apx/movdir.ll
new file mode 100644
index 00000000000000..06fd7511bc143c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/movdir.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+movdir64b,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_movdiri(ptr %p, i32 %v) {
+; CHECK-LABEL: test_movdiri:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ call void @llvm.x86.directstore32(ptr %p, i32 %v)
+ ret void
+}
+
+declare void @llvm.x86.directstore32(ptr, i32)
+
+define void @test_movdiri_64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_movdiri_64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ call void @llvm.x86.directstore64(ptr %p, i64 %v)
+ ret void
+}
+
+declare void @llvm.x86.directstore64(ptr, i64)
+
+define void @test_movdir64b(ptr %dst, ptr %src) {
+; CHECK-LABEL: test_movdir64b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ call void @llvm.x86.movdir64b(ptr %dst, ptr %src)
+ ret void
+}
+
+declare void @llvm.x86.movdir64b(ptr, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/sha.ll b/llvm/test/CodeGen/X86/apx/sha.ll
new file mode 100644
index 00000000000000..088ee61a97f4ea
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/sha.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1rnds4rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1rnds4rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1nexterr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1nexterm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg1rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg1rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg2rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg2rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha256rnds2rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
+; CHECK-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; CHECK-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9]
+; CHECK-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha256rnds2rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0]
+; CHECK-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
+; CHECK-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17]
+; CHECK-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg1rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ent...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/76786
More information about the llvm-commits
mailing list