[llvm] [X86]Support lowering for APX Promoted SHA/MOVDIR/CRC32/INVPCID instructions (PR #76786)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 2 23:21:55 PST 2024
https://github.com/XinWang10 updated https://github.com/llvm/llvm-project/pull/76786
>From c0c6ab1f2a91fa96a119eac8e1bb3bf0deb8dfdc Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 2 Jan 2024 23:10:13 -0800
Subject: [PATCH 1/2] Support Lowering for APX Promoted
SHA/MOVDIR/CRC32/INVPCID instructions
---
llvm/lib/Target/X86/X86FastISel.cpp | 8 +-
llvm/lib/Target/X86/X86InstrSystem.td | 13 +-
llvm/lib/Target/X86/X86InstrVMX.td | 8 +-
llvm/test/CodeGen/X86/apx/cet.ll | 50 +++++
llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll | 61 ++++++
llvm/test/CodeGen/X86/apx/crc32.ll | 58 ++++++
llvm/test/CodeGen/X86/apx/invpcid.ll | 27 +++
llvm/test/CodeGen/X86/apx/movdir.ll | 38 ++++
llvm/test/CodeGen/X86/apx/sha.ll | 186 +++++++++++++++++++
9 files changed, 438 insertions(+), 11 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/apx/cet.ll
create mode 100644 llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
create mode 100644 llvm/test/CodeGen/X86/apx/crc32.ll
create mode 100644 llvm/test/CodeGen/X86/apx/invpcid.ll
create mode 100644 llvm/test/CodeGen/X86/apx/movdir.ll
create mode 100644 llvm/test/CodeGen/X86/apx/sha.ll
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 0ba31e173a1a72..3658af785c24ec 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3047,19 +3047,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
default:
llvm_unreachable("Unexpected intrinsic.");
case Intrinsic::x86_sse42_crc32_32_8:
- Opc = X86::CRC32r32r8;
+ Opc = Subtarget->hasCRC32() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8;
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_16:
- Opc = X86::CRC32r32r16;
+ Opc = Subtarget->hasCRC32() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16;
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_32:
- Opc = X86::CRC32r32r32;
+ Opc = Subtarget->hasCRC32() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32;
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_64_64:
- Opc = X86::CRC32r64r64;
+ Opc = Subtarget->hasCRC32() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64;
RC = &X86::GR64RegClass;
break;
}
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index 699e5847e63fb9..30530a00809f3f 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -695,14 +695,14 @@ def INVPCID32 : I<0x82, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
Requires<[Not64BitMode, HasINVPCID]>;
def INVPCID64 : I<0x82, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
- Requires<[In64BitMode, HasINVPCID]>;
+ Requires<[In64BitMode, HasINVPCID, NoEGPR]>;
def INVPCID64_EVEX : I<0xF2, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invpcid\t{$src2, $src1|$src1, $src2}", []>,
- EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID]>;
+ EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasINVPCID, HasEGPR]>;
} // SchedRW
-let Predicates = [In64BitMode, HasINVPCID] in {
+let Predicates = [In64BitMode, HasINVPCID, NoEGPR] in {
// The instruction can only use a 64 bit register as the register argument
// in 64 bit mode, while the intrinsic only accepts a 32 bit argument
// corresponding to it.
@@ -714,6 +714,13 @@ let Predicates = [In64BitMode, HasINVPCID] in {
addr:$src2)>;
}
+let Predicates = [In64BitMode, HasINVPCID, HasEGPR] in {
+ def : Pat<(int_x86_invpcid GR32:$src1, addr:$src2),
+ (INVPCID64_EVEX
+ (SUBREG_TO_REG (i64 0), (MOV32rr GR32:$src1), sub_32bit),
+ addr:$src2)>;
+}
+
//===----------------------------------------------------------------------===//
// SMAP Instruction
diff --git a/llvm/lib/Target/X86/X86InstrVMX.td b/llvm/lib/Target/X86/X86InstrVMX.td
index 7cc468fe15ad4e..e6722467897216 100644
--- a/llvm/lib/Target/X86/X86InstrVMX.td
+++ b/llvm/lib/Target/X86/X86InstrVMX.td
@@ -21,10 +21,10 @@ def INVEPT32 : I<0x80, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
Requires<[Not64BitMode]>;
def INVEPT64 : I<0x80, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode, NoEGPR]>;
def INVEPT64_EVEX : I<0xF0, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invept\t{$src2, $src1|$src1, $src2}", []>,
- EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
+ EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
// 66 0F 38 81
def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
@@ -32,10 +32,10 @@ def INVVPID32 : I<0x81, MRMSrcMem, (outs), (ins GR32:$src1, i128mem:$src2),
Requires<[Not64BitMode]>;
def INVVPID64 : I<0x81, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>, T8, PD,
- Requires<[In64BitMode]>;
+ Requires<[In64BitMode, NoEGPR]>;
def INVVPID64_EVEX : I<0xF1, MRMSrcMem, (outs), (ins GR64:$src1, i128mem:$src2),
"invvpid\t{$src2, $src1|$src1, $src2}", []>,
- EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode]>;
+ EVEX, NoCD8, T_MAP4, XS, Requires<[In64BitMode, HasEGPR]>;
// 0F 01 C1
def VMCALL : I<0x01, MRM_C1, (outs), (ins), "vmcall", []>, TB;
diff --git a/llvm/test/CodeGen/X86/apx/cet.ll b/llvm/test/CodeGen/X86/apx/cet.ll
new file mode 100644
index 00000000000000..98f3844d1ccd19
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/cet.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+shstk,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_wrssd(i32 %a, ptr %__p) {
+; CHECK-LABEL: test_wrssd:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: wrssd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7c,0x08,0x66,0x3e]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+entry:
+ tail call void @llvm.x86.wrssd(i32 %a, ptr %__p)
+ ret void
+}
+
+declare void @llvm.x86.wrssd(i32, ptr)
+
+define void @test_wrssq(i64 %a, ptr %__p) {
+; CHECK-LABEL: test_wrssq:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: wrssq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfc,0x08,0x66,0x3e]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+entry:
+ tail call void @llvm.x86.wrssq(i64 %a, ptr %__p)
+ ret void
+}
+
+declare void @llvm.x86.wrssq(i64, ptr)
+
+define void @test_wrussd(i32 %a, ptr %__p) {
+; CHECK-LABEL: test_wrussd:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: wrussd %edi, (%rsi) ## encoding: [0x62,0xf4,0x7d,0x08,0x65,0x3e]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+entry:
+ tail call void @llvm.x86.wrussd(i32 %a, ptr %__p)
+ ret void
+}
+
+declare void @llvm.x86.wrussd(i32, ptr)
+
+define void @test_wrussq(i64 %a, ptr %__p) {
+; CHECK-LABEL: test_wrussq:
+; CHECK: ## %bb.0: ## %entry
+; CHECK-NEXT: wrussq %rdi, (%rsi) ## encoding: [0x62,0xf4,0xfd,0x08,0x65,0x3e]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+entry:
+ tail call void @llvm.x86.wrussq(i64 %a, ptr %__p)
+ ret void
+}
+
+declare void @llvm.x86.wrussq(i64, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
new file mode 100644
index 00000000000000..0b51679ccd7fb5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/crc32-fast-isel.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32,+egpr --show-mc-encoding | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32,+egpr --show-mc-encoding | FileCheck %s
+
+define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32b %sil, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %trunc = trunc i32 %a1 to i8
+ %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
+
+define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32w %si, %eax # encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %trunc = trunc i32 %a1 to i16
+ %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
+
+define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
+; CHECK-LABEL: test_mm_crc32_u32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32l %esi, %eax # encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone
+
+define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
+; CHECK-LABEL: test_mm_crc64_u8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: crc32b %sil, %edi # encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xfe]
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %trunc = trunc i32 %a1 to i8
+ %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
+ ret i64 %res
+}
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
+
+define i64 @test_mm_crc64_u64(i64 %a0, i64 %a1) nounwind{
+; CHECK-LABEL: test_mm_crc64_u64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax # encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT: crc32q %rsi, %rax # encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq # encoding: [0xc3]
+ %res = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
+ ret i64 %res
+}
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/apx/crc32.ll b/llvm/test/CodeGen/X86/apx/crc32.ll
new file mode 100644
index 00000000000000..4bcc4d15cc6b5a
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/crc32.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+crc32,+egpr -show-mc-encoding | FileCheck %s
+
+define i32 @crc32_32_8(i32 %a, i8 %b) nounwind {
+; CHECK-LABEL: crc32_32_8:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a, i8 %b)
+ ret i32 %tmp
+}
+
+define i32 @crc32_32_16(i32 %a, i16 %b) nounwind {
+; CHECK-LABEL: crc32_32_16:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32w %si, %eax ## encoding: [0x62,0xf4,0x7d,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a, i16 %b)
+ ret i32 %tmp
+}
+
+define i32 @crc32_32_32(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: crc32_32_32:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movl %edi, %eax ## encoding: [0x89,0xf8]
+; CHECK-NEXT: crc32l %esi, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a, i32 %b)
+ ret i32 %tmp
+}
+
+define i64 @crc32_64_8(i64 %a, i8 %b) nounwind {
+; CHECK-LABEL: crc32_64_8:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT: crc32b %sil, %eax ## encoding: [0x62,0xf4,0x7c,0x08,0xf0,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a, i8 %b)
+ ret i64 %tmp
+}
+
+define i64 @crc32_64_64(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: crc32_64_64:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: movq %rdi, %rax ## encoding: [0x48,0x89,0xf8]
+; CHECK-NEXT: crc32q %rsi, %rax ## encoding: [0x62,0xf4,0xfc,0x08,0xf1,0xc6]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %tmp = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a, i64 %b)
+ ret i64 %tmp
+}
+
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind
+declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
diff --git a/llvm/test/CodeGen/X86/apx/invpcid.ll b/llvm/test/CodeGen/X86/apx/invpcid.ll
new file mode 100644
index 00000000000000..389895f4921305
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/invpcid.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+invpcid,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_invpcid(i32 %type, ptr %descriptor) {
+; CHECK-LABEL: test_invpcid:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl %edi, %eax # encoding: [0x89,0xf8]
+; CHECK-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ call void @llvm.x86.invpcid(i32 %type, ptr %descriptor)
+ ret void
+}
+
+define void @test_invpcid2(ptr readonly %type, ptr %descriptor) {
+; CHECK-LABEL: test_invpcid2:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movl (%rdi), %eax # encoding: [0x8b,0x07]
+; CHECK-NEXT: invpcid (%rsi), %rax # encoding: [0x62,0xf4,0x7e,0x08,0xf2,0x06]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load i32, ptr %type, align 4
+ tail call void @llvm.x86.invpcid(i32 %0, ptr %descriptor) #1
+ ret void
+}
+
+declare void @llvm.x86.invpcid(i32, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/movdir.ll b/llvm/test/CodeGen/X86/apx/movdir.ll
new file mode 100644
index 00000000000000..06fd7511bc143c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/movdir.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+movdiri,+movdir64b,+egpr --show-mc-encoding | FileCheck %s
+
+define void @test_movdiri(ptr %p, i32 %v) {
+; CHECK-LABEL: test_movdiri:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movdiri %esi, (%rdi) # encoding: [0x62,0xf4,0x7c,0x08,0xf9,0x37]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ call void @llvm.x86.directstore32(ptr %p, i32 %v)
+ ret void
+}
+
+declare void @llvm.x86.directstore32(ptr, i32)
+
+define void @test_movdiri_64(ptr %p, i64 %v) {
+; CHECK-LABEL: test_movdiri_64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movdiri %rsi, (%rdi) # encoding: [0x62,0xf4,0xfc,0x08,0xf9,0x37]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ call void @llvm.x86.directstore64(ptr %p, i64 %v)
+ ret void
+}
+
+declare void @llvm.x86.directstore64(ptr, i64)
+
+define void @test_movdir64b(ptr %dst, ptr %src) {
+; CHECK-LABEL: test_movdir64b:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movdir64b (%rsi), %rdi # encoding: [0x62,0xf4,0x7d,0x08,0xf8,0x3e]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ call void @llvm.x86.movdir64b(ptr %dst, ptr %src)
+ ret void
+}
+
+declare void @llvm.x86.movdir64b(ptr, ptr)
diff --git a/llvm/test/CodeGen/X86/apx/sha.ll b/llvm/test/CodeGen/X86/apx/sha.ll
new file mode 100644
index 00000000000000..088ee61a97f4ea
--- /dev/null
+++ b/llvm/test/CodeGen/X86/apx/sha.ll
@@ -0,0 +1,186 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc < %s -mattr=+sha,+egpr -mtriple=x86_64-unknown-unknown --show-mc-encoding | FileCheck %s
+
+declare <4 x i32> @llvm.x86.sha1rnds4(<4 x i32>, <4 x i32>, i8) nounwind readnone
+
+define <4 x i32> @test_sha1rnds4rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1rnds4rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1rnds4 $3, %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0xc1,0x03]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %b, i8 3)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1rnds4rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1rnds4rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1nexte(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1nexterr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1nexterr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1nexte %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0xc1]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1nexterm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1nexterm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1nexte (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd8,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha1nexte(<4 x i32> %a, <4 x i32> %0)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg1rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0xc1]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg1rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd9,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha1msg1(<4 x i32> %a, <4 x i32> %0)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha1msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha1msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg2rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0xc1]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha1msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha1msg2rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xda,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha1msg2(<4 x i32> %a, <4 x i32> %0)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256rnds2(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256rnds2rr(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha256rnds2rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps %xmm0, %xmm3 # encoding: [0x0f,0x28,0xd8]
+; CHECK-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; CHECK-NEXT: sha256rnds2 %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0xd9]
+; CHECK-NEXT: movaps %xmm3, %xmm0 # encoding: [0x0f,0x28,0xc3]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha256rnds2rm(<4 x i32> %a, ptr %b, <4 x i32> %c) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha256rnds2rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movaps %xmm0, %xmm2 # encoding: [0x0f,0x28,0xd0]
+; CHECK-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1]
+; CHECK-NEXT: sha256rnds2 %xmm0, (%rdi), %xmm2 # encoding: [0x62,0xf4,0x7c,0x08,0xdb,0x17]
+; CHECK-NEXT: movaps %xmm2, %xmm0 # encoding: [0x0f,0x28,0xc2]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha256rnds2(<4 x i32> %a, <4 x i32> %0, <4 x i32> %c)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256msg1(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg1rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg1rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha256msg1 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0xc1]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha256msg1rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg1rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha256msg1 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdc,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha256msg1(<4 x i32> %a, <4 x i32> %0)
+ ret <4 x i32> %1
+}
+
+declare <4 x i32> @llvm.x86.sha256msg2(<4 x i32>, <4 x i32>) nounwind readnone
+
+define <4 x i32> @test_sha256msg2rr(<4 x i32> %a, <4 x i32> %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg2rr:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha256msg2 %xmm1, %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0xc1]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %b)
+ ret <4 x i32> %0
+}
+
+define <4 x i32> @test_sha256msg2rm(<4 x i32> %a, ptr %b) nounwind uwtable {
+; CHECK-LABEL: test_sha256msg2rm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha256msg2 (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xdd,0x07]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha256msg2(<4 x i32> %a, <4 x i32> %0)
+ ret <4 x i32> %1
+}
+
+; Make sure we don't forget that sha instructions have no VEX equivalents and thus don't zero YMM/ZMM.
+define <8 x i32> @test_sha1rnds4_zero_extend(<4 x i32> %a, ptr %b) nounwind uwtable {
+;
+; CHECK-LABEL: test_sha1rnds4_zero_extend:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: sha1rnds4 $3, (%rdi), %xmm0 # encoding: [0x62,0xf4,0x7c,0x08,0xd4,0x07,0x03]
+; CHECK-NEXT: xorps %xmm1, %xmm1 # encoding: [0x0f,0x57,0xc9]
+; CHECK-NEXT: retq # encoding: [0xc3]
+entry:
+ %0 = load <4 x i32>, ptr %b
+ %1 = tail call <4 x i32> @llvm.x86.sha1rnds4(<4 x i32> %a, <4 x i32> %0, i8 3)
+ %2 = shufflevector <4 x i32> %1, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i32> %2
+}
>From b6d60b595e3784df18042a858206f28f7e5a1082 Mon Sep 17 00:00:00 2001
From: "Wang, Xin10" <xin10.wang at intel.com>
Date: Tue, 2 Jan 2024 23:21:46 -0800
Subject: [PATCH 2/2] fix error
---
llvm/lib/Target/X86/X86FastISel.cpp | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 3658af785c24ec..8b4ff4c8ed878f 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3047,19 +3047,19 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
default:
llvm_unreachable("Unexpected intrinsic.");
case Intrinsic::x86_sse42_crc32_32_8:
- Opc = Subtarget->hasCRC32() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8;
+ Opc = Subtarget->hasEGPR() ? X86::CRC32r32r8_EVEX : X86::CRC32r32r8;
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_16:
- Opc = Subtarget->hasCRC32() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16;
+ Opc = Subtarget->hasEGPR() ? X86::CRC32r32r16_EVEX : X86::CRC32r32r16;
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_32_32:
- Opc = Subtarget->hasCRC32() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32;
+ Opc = Subtarget->hasEGPR() ? X86::CRC32r32r32_EVEX : X86::CRC32r32r32;
RC = &X86::GR32RegClass;
break;
case Intrinsic::x86_sse42_crc32_64_64:
- Opc = Subtarget->hasCRC32() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64;
+ Opc = Subtarget->hasEGPR() ? X86::CRC32r64r64_EVEX : X86::CRC32r64r64;
RC = &X86::GR64RegClass;
break;
}
More information about the llvm-commits
mailing list