[llvm] 5368c10 - [X86][FastISel] Handle CRC32 intrinsics

Alexis Engelke via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 21 04:40:54 PDT 2023


Author: Alexis Engelke
Date: 2023-04-21T13:40:48+02:00
New Revision: 5368c1065f0eaefc442247134ba3b2e98c97fda7

URL: https://github.com/llvm/llvm-project/commit/5368c1065f0eaefc442247134ba3b2e98c97fda7
DIFF: https://github.com/llvm/llvm-project/commit/5368c1065f0eaefc442247134ba3b2e98c97fda7.diff

LOG: [X86][FastISel] Handle CRC32 intrinsics

Some applications make heavy use of the crc32 operation (e.g., as part
of a hash function), so having a FastISel path avoids fallbacks to
SelectionDAG and improves compile times, in our case by ~1.5%.

Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D148023

Added: 
    llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll

Modified: 
    llvm/lib/Target/X86/X86FastISel.cpp
    llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index ade4ff61762a..e414a3d717bc 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3030,6 +3030,58 @@ bool X86FastISel::fastLowerIntrinsicCall(const IntrinsicInst *II) {
     updateValueMap(II, ResultReg);
     return true;
   }
+  case Intrinsic::x86_sse42_crc32_32_8:
+  case Intrinsic::x86_sse42_crc32_32_16:
+  case Intrinsic::x86_sse42_crc32_32_32:
+  case Intrinsic::x86_sse42_crc32_64_64: {
+    if (!Subtarget->hasCRC32())
+      return false;
+
+    Type *RetTy = II->getCalledFunction()->getReturnType();
+
+    MVT VT;
+    if (!isTypeLegal(RetTy, VT))
+      return false;
+
+    unsigned Opc;
+    const TargetRegisterClass *RC = nullptr;
+
+    switch (II->getIntrinsicID()) {
+    default:
+      llvm_unreachable("Unexpected intrinsic.");
+    case Intrinsic::x86_sse42_crc32_32_8:
+      Opc = X86::CRC32r32r8;
+      RC = &X86::GR32RegClass;
+      break;
+    case Intrinsic::x86_sse42_crc32_32_16:
+      Opc = X86::CRC32r32r16;
+      RC = &X86::GR32RegClass;
+      break;
+    case Intrinsic::x86_sse42_crc32_32_32:
+      Opc = X86::CRC32r32r32;
+      RC = &X86::GR32RegClass;
+      break;
+    case Intrinsic::x86_sse42_crc32_64_64:
+      Opc = X86::CRC32r64r64;
+      RC = &X86::GR64RegClass;
+      break;
+    }
+
+    const Value *LHS = II->getArgOperand(0);
+    const Value *RHS = II->getArgOperand(1);
+
+    Register LHSReg = getRegForValue(LHS);
+    Register RHSReg = getRegForValue(RHS);
+    if (!LHSReg || !RHSReg)
+      return false;
+
+    Register ResultReg = fastEmitInst_rr(Opc, RC, LHSReg, RHSReg);
+    if (!ResultReg)
+      return false;
+
+    updateValueMap(II, ResultReg);
+    return true;
+  }
   }
 }
 

diff  --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
new file mode 100644
index 000000000000..056d79f379fd
--- /dev/null
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no-generate-body-for-unused-prefixes
+; i686 uses -fast-isel-abort=1 only as argument lowering is not supported, so check that FastISel didn't miss the call.
+; RUN: llc < %s -fast-isel -pass-remarks-missed=sdagisel -mtriple=i686-unknown-unknown -mattr=+crc32 2>&1 >/dev/null | FileCheck %s -check-prefix=STDERR-X86 -allow-empty
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -fast-isel -fast-isel-abort=1 -mtriple=i686-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X86
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s -check-prefix=X64
+
+; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
+
+; STDERR-X86-NOT: FastISel missed call:   %res = call i32 @llvm.x86.sse42.crc32
+
+; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
+define i32 @test_mm_crc32_u8(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: test_mm_crc32_u8:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    crc32b %cl, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_crc32_u8:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    crc32b %sil, %eax
+; X64-NEXT:    retq
+  %trunc = trunc i32 %a1 to i8
+  %res = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %trunc)
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind readnone
+
+; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
+define i32 @test_mm_crc32_u16(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: test_mm_crc32_u16:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    crc32w %cx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_crc32_u16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    crc32w %si, %eax
+; X64-NEXT:    retq
+  %trunc = trunc i32 %a1 to i16
+  %res = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %trunc)
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind readnone
+
+define i32 @test_mm_crc32_u32(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: test_mm_crc32_u32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    crc32l {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: test_mm_crc32_u32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    crc32l %esi, %eax
+; X64-NEXT:    retq
+  %res = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
+  ret i32 %res
+}
+declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind readnone

diff  --git a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
index 5c2944495256..e0ec432b3854 100644
--- a/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
+++ b/llvm/test/CodeGen/X86/crc32-intrinsics-fast-isel-x86_64.ll
@@ -1,16 +1,18 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
-; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=-sse4.2,+crc32 | FileCheck %s
+; RUN: llc < %s -fast-isel -fast-isel-abort=3 -mtriple=x86_64-unknown-unknown -mattr=+crc32 | FileCheck %s
 
 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse42-builtins.c
 
-define i64 @test_mm_crc64_u8(i64 %a0, i8 %a1) nounwind{
+; Note: %a1 is i32 as FastISel can't handle i8/i16 arguments.
+define i64 @test_mm_crc64_u8(i64 %a0, i32 %a1) nounwind{
 ; CHECK-LABEL: test_mm_crc64_u8:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    crc32b %sil, %edi
 ; CHECK-NEXT:    movl %edi, %eax
 ; CHECK-NEXT:    retq
-  %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %a1)
+  %trunc = trunc i32 %a1 to i8
+  %res = call i64 @llvm.x86.sse42.crc32.64.8(i64 %a0, i8 %trunc)
   ret i64 %res
 }
 declare i64 @llvm.x86.sse42.crc32.64.8(i64, i8) nounwind readnone


        


More information about the llvm-commits mailing list