[llvm] 5368c10 - [X86][FastISel] Handle CRC32 intrinsics
Alexis Engelke via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 21 04:40:54 PDT 2023
Author: Alexis Engelke
Date: 2023-04-21T13:40:48+02:00
New Revision: 5368c1065f0eaefc442247134ba3b2e98c97fda7
URL: https://github.com/llvm/llvm-project/commit/5368c1065f0eaefc442247134ba3b2e98c97fda7
DIFF: https://github.com/llvm/llvm-project/commit/5368c1065f0eaefc442247134ba3b2e98c97fda7.diff
LOG: [X86][FastISel] Handle CRC32 intrinsics
Some applications make heavy use of the crc32 operation (e.g., as part
of a hash function), so having a FastISel path avoids fallbacks to
SelectionDAG and improves compile times, in our case by ~1.5%.
Reviewed By: pengfei
Differential Revision: https://reviews.llvm.org/D148023
Added:
llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
Modified:
llvm/lib/Target/X86/X86FastISel.cpp
llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index ade4ff61762a..e414a3d717bc 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3030,6 +3030,58 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
updateValueMap(II, ResultReg);
return true;
}
+ case Intrinsic::x86_sse42_crc32_32_8:
+ case Intrinsic::x86_sse42_crc32_32_16:
+ case Intrinsic::x86_sse42_crc32_32_32:
+ case Intrinsic::x86_sse42_crc32_64_64: {
+ if (!Subtarget->hasCRC32())
+ return false;
+
+ Type *RetTy = II->getCalledFunction()->getReturnType();
+
+ MVT VT;
+ if (!isTypeLegal(RetTy, VT))
+ return false;
+
+ unsigned Opc;
+ const TargetRegisterClass *RC = nullptr;
+
+ switch (II->getIntrinsicID()) {
+ default:
+ llvm_unreachable("Unexpected intrinsic.");
+ case Intrinsic::x86_sse42_crc32_32_8:
+ Opc = X86::CRC32r32r8;
+ RC = &X86::GR32RegClass;
+ break;
+ case Intrinsic::x86_sse42_crc32_32_16:
+ Opc = X86::CRC32r32r16;
+ RC = &X86::GR32RegClass;
+ break;
+ case Intrinsic::x86_sse42_crc32_32_32:
+ Opc = X86::CRC32r32r32;
+ RC = &X86::GR32RegClass;
+ break;
+ case Intrinsic::x86_sse42_crc32_64_64:
+ Opc = X86::CRC32r64r64;
+ RC = &X86::GR64RegClass;
+ break;
+ }
+
+ const Value *LHS = II->getArgOperand(0);
+ const Value *RHS = II->getArgOperand(1);
+
+ Register LHSReg = getRegForValue(LHS);
+ Register RHSReg = getRegForValue(RHS);
+ if (!LHSReg || !RHSReg)
+ return false;
+
+ Register ResultReg = fastEmitInst_rr(Opc, RC, LHSReg, RHSReg);
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(II, ResultReg);
+ return true;
+ }
}
}
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
new file mode 100644
index 000000000000..056d79f379fd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no-generate-body-for-unused-prefixes
+; i686 uses -fast-isel-abort=1 only as argument lowering is not supported, so check that FastISel didn't miss the call.
+; RUN: llc < %s -fast-isel -pass-remarks-missed=sdagisel -mtriple=i686-unknown-unknown -mattr=+crc32 2>&1 >/dev/null | FileCheck %s -check-prefix=STDERR-X86 -allow-empty
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X64
+
+; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
+
+; STDERR-X86-NOT: FastISel missed call: %res = call i32 @llvm.x86.sse42.crc32
+
+; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
+define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: test_mm_crc32_u8:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: crc32b %cl, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm_crc32_u8:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: crc32b %sil, %eax
+; X64-NEXT: retq
+ %trunc = trunc i32 %a1 to i8
+ %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
+
+; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
+define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: test_mm_crc32_u16:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: crc32w %cx, %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm_crc32_u16:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: crc32w %si, %eax
+; X64-NEXT: retq
+ %trunc = trunc i32 %a1 to i16
+ %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
+
+define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: test_mm_crc32_u32:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: crc32l {{[0-9]+}}(%esp), %eax
+; X86-NEXT: retl
+;
+; X64-LABEL: test_mm_crc32_u32:
+; X64: # %bb.0:
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: crc32l %esi, %eax
+; X64-NEXT: retq
+ %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
+ ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone
diff --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
index 5c2944495256..e0ec432b3854 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
@@ -1,16 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
-; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
-define i64 @test_mm_crc64_u8(i64 %a0, i8 %a1) nounwind{
+; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
+define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
; CHECK-LABEL: test_mm_crc64_u8:
; CHECK: # %bb.0:
; CHECK-NEXT: crc32b %sil, %edi
; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
- %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1)
+ %trunc = trunc i32 %a1 to i8
+ %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
ret i64 %res
}
declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone
More information about the llvm-commits
mailing list