[llvm] 1978b15 - [X86] Add support for llvm.clmul.i64 on 32-bit targets (#176449)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jan 17 07:32:10 PST 2026
Author: Simon Pilgrim
Date: 2026-01-17T15:32:05Z
New Revision: 1978b15e0c3bf1dc67501fbf4ecd352aaa2fa10c
URL: https://github.com/llvm/llvm-project/commit/1978b15e0c3bf1dc67501fbf4ecd352aaa2fa10c
DIFF: https://github.com/llvm/llvm-project/commit/1978b15e0c3bf1dc67501fbf4ecd352aaa2fa10c.diff
LOG: [X86] Add support for llvm.clmul.i64 on 32-bit targets (#176449)
Add ReplaceNodeResults expansion for llvm.clmul.i64 on 32-bit x86
targets
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/clmul-x86.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ef94c198558c7..640d9c5f5bc07 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1156,10 +1156,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::XOR, MVT::i128, Custom);
if (Subtarget.hasPCLMUL()) {
- if (Subtarget.is64Bit()) {
- setOperationAction(ISD::CLMUL, MVT::i64, Custom);
- setOperationAction(ISD::CLMULH, MVT::i64, Custom);
- }
+ setOperationAction(ISD::CLMUL, MVT::i64, Custom);
+ setOperationAction(ISD::CLMULH, MVT::i64, Custom);
setOperationAction(ISD::CLMUL, MVT::i32, Custom);
setOperationAction(ISD::CLMUL, MVT::i16, Custom);
setOperationAction(ISD::CLMUL, MVT::i8, Custom);
@@ -34121,6 +34119,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
SelectionDAG &DAG) const {
SDLoc dl(N);
unsigned Opc = N->getOpcode();
+ bool NoImplicitFloatOps =
+ DAG.getMachineFunction().getFunction().hasFnAttribute(
+ Attribute::NoImplicitFloat);
+
switch (Opc) {
default:
#ifndef NDEBUG
@@ -34194,9 +34196,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
// Use a v2i64 if possible.
- bool NoImplicitFloatOps =
- DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
if (isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) {
SDValue Wide =
DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0));
@@ -34348,6 +34347,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(Res);
return;
}
+ case ISD::CLMUL:
+ case ISD::CLMULH: {
+ assert(Subtarget.hasPCLMUL() && "PCLMUL required for CLMUL lowering");
+ assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+ if (NoImplicitFloatOps)
+ return;
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ bool IsHigh = Opc == ISD::CLMULH;
+ SDValue Res =
+ DAG.getNode(X86ISD::PCLMULQDQ, dl, MVT::v2i64,
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, LHS),
+ DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, RHS),
+ DAG.getTargetConstant(0, dl, MVT::i8));
+ Res = DAG.getExtractVectorElt(dl, MVT::i64, Res, IsHigh ? 1 : 0);
+ Results.push_back(Res);
+ return;
+ }
// We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
case X86ISD::FMINC:
case X86ISD::FMIN:
@@ -35148,9 +35165,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
assert(
(N->getValueType(0) == MVT::i64 || N->getValueType(0) == MVT::i128) &&
"Unexpected VT!");
- bool NoImplicitFloatOps =
- DAG.getMachineFunction().getFunction().hasFnAttribute(
- Attribute::NoImplicitFloat);
if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
auto *Node = cast<AtomicSDNode>(N);
diff --git a/llvm/test/CodeGen/X86/clmul-x86.ll b/llvm/test/CodeGen/X86/clmul-x86.ll
index c9a7cde29e30c..cac2625f66fa1 100644
--- a/llvm/test/CodeGen/X86/clmul-x86.ll
+++ b/llvm/test/CodeGen/X86/clmul-x86.ll
@@ -18,76 +18,12 @@ define i32 @clmul_i32(i32 %a, i32 %b) nounwind {
define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: clmul_i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: movd %ecx, %xmm0
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %edx
-; CHECK-NEXT: shrl $4, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT: shrl %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: leal (%ecx,%edx,2), %ecx
-; CHECK-NEXT: movd %ecx, %xmm2
-; CHECK-NEXT: movd %eax, %xmm1
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %ecx
-; CHECK-NEXT: shrl $4, %eax
-; CHECK-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %ecx, %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %eax
-; CHECK-NEXT: andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT: leal (%eax,%ecx,4), %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT: leal (%eax,%ecx,2), %eax
-; CHECK-NEXT: movd %eax, %xmm3
-; CHECK-NEXT: pclmulqdq $0, %xmm2, %xmm3
-; CHECK-NEXT: movd %xmm3, %eax
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %ecx
-; CHECK-NEXT: shrl $4, %eax
-; CHECK-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %ecx, %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %eax
-; CHECK-NEXT: andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT: leal (%eax,%ecx,4), %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: andl $1431655764, %eax # imm = 0x55555554
-; CHECK-NEXT: leal (%eax,%ecx,2), %eax
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: pclmulqdq $0, %xmm0, %xmm2
-; CHECK-NEXT: movd %xmm2, %ecx
-; CHECK-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT: pclmulqdq $0, %xmm1, %xmm2
-; CHECK-NEXT: movd %xmm2, %edx
-; CHECK-NEXT: xorl %ecx, %edx
-; CHECK-NEXT: xorl %eax, %edx
+; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
; CHECK-NEXT: pclmulqdq $0, %xmm0, %xmm1
; CHECK-NEXT: movd %xmm1, %eax
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
+; CHECK-NEXT: movd %xmm0, %edx
; CHECK-NEXT: retl
%res = call i64 @llvm.clmul.i64(i64 %a, i64 %b)
ret i64 %res
@@ -96,103 +32,30 @@ define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
define i64 @clmulr_i64(i64 %a, i64 %b) nounwind {
; CHECK-LABEL: clmulr_i64:
; CHECK: # %bb.0:
+; CHECK-NEXT: pushl %edi
; CHECK-NEXT: pushl %esi
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %edx
-; CHECK-NEXT: shrl $4, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT: shrl %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: leal (%ecx,%edx,2), %ecx
-; CHECK-NEXT: movd %ecx, %xmm0
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %edx
-; CHECK-NEXT: shrl $4, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT: shrl %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: leal (%ecx,%edx,2), %ecx
-; CHECK-NEXT: movd %ecx, %xmm2
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %ecx
-; CHECK-NEXT: shrl $4, %eax
-; CHECK-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %ecx, %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %eax
-; CHECK-NEXT: andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT: leal (%eax,%ecx,4), %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT: leal (%eax,%ecx,2), %eax
-; CHECK-NEXT: movd %eax, %xmm1
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %ecx
-; CHECK-NEXT: shrl $4, %eax
-; CHECK-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %ecx, %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %eax
-; CHECK-NEXT: andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT: leal (%eax,%ecx,4), %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT: leal (%eax,%ecx,2), %eax
-; CHECK-NEXT: movd %eax, %xmm3
-; CHECK-NEXT: pclmulqdq $0, %xmm2, %xmm3
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movd %xmm3, %ecx
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: movl %ecx, %esi
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT: bswapl %esi
+; CHECK-NEXT: movl %esi, %edi
+; CHECK-NEXT: andl $252645135, %edi # imm = 0xF0F0F0F
+; CHECK-NEXT: shll $4, %edi
+; CHECK-NEXT: shrl $4, %esi
; CHECK-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %esi
-; CHECK-NEXT: shrl $4, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %esi, %ecx
-; CHECK-NEXT: movl %ecx, %esi
+; CHECK-NEXT: orl %edi, %esi
+; CHECK-NEXT: movl %esi, %edi
+; CHECK-NEXT: andl $858993459, %edi # imm = 0x33333333
+; CHECK-NEXT: shrl $2, %esi
; CHECK-NEXT: andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: leal (%ecx,%esi,4), %ecx
-; CHECK-NEXT: movl %ecx, %esi
+; CHECK-NEXT: leal (%esi,%edi,4), %esi
+; CHECK-NEXT: movl %esi, %edi
+; CHECK-NEXT: andl $1431655765, %edi # imm = 0x55555555
+; CHECK-NEXT: shrl %esi
; CHECK-NEXT: andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT: shrl %ecx
-; CHECK-NEXT: andl $1431655764, %ecx # imm = 0x55555554
-; CHECK-NEXT: leal (%ecx,%esi,2), %ecx
-; CHECK-NEXT: shrl %ecx
+; CHECK-NEXT: leal (%esi,%edi,2), %esi
+; CHECK-NEXT: movd %esi, %xmm1
; CHECK-NEXT: bswapl %edx
; CHECK-NEXT: movl %edx, %esi
; CHECK-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
@@ -210,101 +73,8 @@ define i64 @clmulr_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: shrl %edx
; CHECK-NEXT: andl $1431655765, %edx # imm = 0x55555555
; CHECK-NEXT: leal (%edx,%esi,2), %edx
-; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pclmulqdq $0, %xmm0, %xmm2
-; CHECK-NEXT: movd %xmm2, %edx
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: movl %eax, %esi
-; CHECK-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %esi
-; CHECK-NEXT: shrl $4, %eax
-; CHECK-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %esi, %eax
-; CHECK-NEXT: movl %eax, %esi
-; CHECK-NEXT: andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %eax
-; CHECK-NEXT: andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT: leal (%eax,%esi,4), %eax
-; CHECK-NEXT: movl %eax, %esi
-; CHECK-NEXT: andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT: leal (%eax,%esi,2), %eax
-; CHECK-NEXT: movd %eax, %xmm2
-; CHECK-NEXT: pclmulqdq $0, %xmm1, %xmm2
-; CHECK-NEXT: movd %xmm2, %eax
-; CHECK-NEXT: xorl %edx, %eax
-; CHECK-NEXT: xorl %ecx, %eax
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %ecx
-; CHECK-NEXT: shrl $4, %eax
-; CHECK-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %ecx, %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %eax
-; CHECK-NEXT: andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT: leal (%eax,%ecx,4), %eax
-; CHECK-NEXT: movl %eax, %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT: leal (%eax,%ecx,2), %eax
-; CHECK-NEXT: pclmulqdq $0, %xmm0, %xmm1
-; CHECK-NEXT: movd %xmm1, %ecx
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %edx
-; CHECK-NEXT: shrl $4, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT: shrl %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: leal (%ecx,%edx,2), %edx
-; CHECK-NEXT: popl %esi
-; CHECK-NEXT: retl
- %a.ext = zext i64 %a to i128
- %b.ext = zext i64 %b to i128
- %clmul = call i128 @llvm.clmul.i128(i128 %a.ext, i128 %b.ext)
- %res.ext = lshr i128 %clmul, 63
- %res = trunc i128 %res.ext to i64
- ret i64 %res
-}
-
-define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
-; CHECK-LABEL: clmulh_i64:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pushl %esi
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %edx
-; CHECK-NEXT: shrl $4, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT: shrl %ecx
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: leal (%ecx,%edx,2), %ecx
-; CHECK-NEXT: movd %ecx, %xmm0
+; CHECK-NEXT: movd %edx, %xmm0
+; CHECK-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; CHECK-NEXT: bswapl %ecx
; CHECK-NEXT: movl %ecx, %edx
; CHECK-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
@@ -341,6 +111,9 @@ define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555
; CHECK-NEXT: leal (%eax,%ecx,2), %eax
; CHECK-NEXT: movd %eax, %xmm1
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-NEXT: pclmulqdq $0, %xmm0, %xmm1
+; CHECK-NEXT: movd %xmm1, %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
@@ -357,72 +130,9 @@ define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; CHECK-NEXT: shrl %eax
; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT: leal (%eax,%ecx,2), %eax
-; CHECK-NEXT: movd %eax, %xmm3
-; CHECK-NEXT: pclmulqdq $0, %xmm2, %xmm3
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT: movd %xmm3, %ecx
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: movl %ecx, %esi
-; CHECK-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %esi
-; CHECK-NEXT: shrl $4, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %esi, %ecx
-; CHECK-NEXT: movl %ecx, %esi
-; CHECK-NEXT: andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: leal (%ecx,%esi,4), %ecx
-; CHECK-NEXT: movl %ecx, %esi
-; CHECK-NEXT: andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT: shrl %ecx
-; CHECK-NEXT: andl $1431655764, %ecx # imm = 0x55555554
-; CHECK-NEXT: leal (%ecx,%esi,2), %ecx
-; CHECK-NEXT: shrl %ecx
-; CHECK-NEXT: bswapl %edx
-; CHECK-NEXT: movl %edx, %esi
-; CHECK-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %esi
-; CHECK-NEXT: shrl $4, %edx
-; CHECK-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %esi, %edx
-; CHECK-NEXT: movl %edx, %esi
-; CHECK-NEXT: andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %edx
-; CHECK-NEXT: andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT: leal (%edx,%esi,4), %edx
-; CHECK-NEXT: movl %edx, %esi
-; CHECK-NEXT: andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT: shrl %edx
-; CHECK-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT: leal (%edx,%esi,2), %edx
-; CHECK-NEXT: movd %edx, %xmm2
-; CHECK-NEXT: pclmulqdq $0, %xmm0, %xmm2
-; CHECK-NEXT: movd %xmm2, %edx
-; CHECK-NEXT: bswapl %eax
-; CHECK-NEXT: movl %eax, %esi
-; CHECK-NEXT: andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %esi
-; CHECK-NEXT: shrl $4, %eax
-; CHECK-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %esi, %eax
-; CHECK-NEXT: movl %eax, %esi
-; CHECK-NEXT: andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %eax
-; CHECK-NEXT: andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT: leal (%eax,%esi,4), %eax
-; CHECK-NEXT: movl %eax, %esi
-; CHECK-NEXT: andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT: leal (%eax,%esi,2), %eax
-; CHECK-NEXT: movd %eax, %xmm2
-; CHECK-NEXT: pclmulqdq $0, %xmm1, %xmm2
-; CHECK-NEXT: movd %xmm2, %eax
-; CHECK-NEXT: xorl %edx, %eax
-; CHECK-NEXT: xorl %ecx, %eax
+; CHECK-NEXT: leal (%eax,%ecx,2), %edx
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
+; CHECK-NEXT: movd %xmm0, %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
@@ -438,30 +148,29 @@ define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
; CHECK-NEXT: movl %eax, %ecx
; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
; CHECK-NEXT: shrl %eax
-; CHECK-NEXT: andl $1431655764, %eax # imm = 0x55555554
+; CHECK-NEXT: andl $1431655765, %eax # imm = 0x55555555
; CHECK-NEXT: leal (%eax,%ecx,2), %eax
-; CHECK-NEXT: pclmulqdq $0, %xmm0, %xmm1
-; CHECK-NEXT: movd %xmm1, %ecx
-; CHECK-NEXT: bswapl %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT: shll $4, %edx
-; CHECK-NEXT: shrl $4, %ecx
-; CHECK-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT: orl %edx, %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT: shrl $2, %ecx
-; CHECK-NEXT: andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT: leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: shrl %edx
-; CHECK-NEXT: shrdl $1, %edx, %eax
-; CHECK-NEXT: andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT: andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT: leal (%edx,%ecx,2), %edx
-; CHECK-NEXT: shrl %edx
; CHECK-NEXT: popl %esi
+; CHECK-NEXT: popl %edi
+; CHECK-NEXT: retl
+ %a.ext = zext i64 %a to i128
+ %b.ext = zext i64 %b to i128
+ %clmul = call i128 @llvm.clmul.i128(i128 %a.ext, i128 %b.ext)
+ %res.ext = lshr i128 %clmul, 63
+ %res = trunc i128 %res.ext to i64
+ ret i64 %res
+}
+
+define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: clmulh_i64:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT: movq {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT: pclmulqdq $0, %xmm0, %xmm1
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NEXT: movd %xmm0, %eax
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
+; CHECK-NEXT: movd %xmm0, %edx
; CHECK-NEXT: retl
%a.ext = zext i64 %a to i128
%b.ext = zext i64 %b to i128
More information about the llvm-commits
mailing list