[llvm] 1978b15 - [X86] Add support for llvm.clmul.i64 on 32-bit targets (#176449)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 17 07:32:10 PST 2026


Author: Simon Pilgrim
Date: 2026-01-17T15:32:05Z
New Revision: 1978b15e0c3bf1dc67501fbf4ecd352aaa2fa10c

URL: https://github.com/llvm/llvm-project/commit/1978b15e0c3bf1dc67501fbf4ecd352aaa2fa10c
DIFF: https://github.com/llvm/llvm-project/commit/1978b15e0c3bf1dc67501fbf4ecd352aaa2fa10c.diff

LOG: [X86] Add support for llvm.clmul.i64 on 32-bit targets (#176449)

Add ReplaceNodeResults expansion for llvm.clmul.i64 on 32-bit x86
targets

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/clmul-x86.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ef94c198558c7..640d9c5f5bc07 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1156,10 +1156,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
     setOperationAction(ISD::XOR, MVT::i128, Custom);
 
     if (Subtarget.hasPCLMUL()) {
-      if (Subtarget.is64Bit()) {
-        setOperationAction(ISD::CLMUL, MVT::i64, Custom);
-        setOperationAction(ISD::CLMULH, MVT::i64, Custom);
-      }
+      setOperationAction(ISD::CLMUL, MVT::i64, Custom);
+      setOperationAction(ISD::CLMULH, MVT::i64, Custom);
       setOperationAction(ISD::CLMUL, MVT::i32, Custom);
       setOperationAction(ISD::CLMUL, MVT::i16, Custom);
       setOperationAction(ISD::CLMUL, MVT::i8, Custom);
@@ -34121,6 +34119,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
                                            SelectionDAG &DAG) const {
   SDLoc dl(N);
   unsigned Opc = N->getOpcode();
+  bool NoImplicitFloatOps =
+      DAG.getMachineFunction().getFunction().hasFnAttribute(
+          Attribute::NoImplicitFloat);
+
   switch (Opc) {
   default:
 #ifndef NDEBUG
@@ -34194,9 +34196,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
       return;
     }
     // Use a v2i64 if possible.
-    bool NoImplicitFloatOps =
-        DAG.getMachineFunction().getFunction().hasFnAttribute(
-            Attribute::NoImplicitFloat);
     if (isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) {
       SDValue Wide =
           DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0));
@@ -34348,6 +34347,24 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     Results.push_back(Res);
     return;
   }
+  case ISD::CLMUL:
+  case ISD::CLMULH: {
+    assert(Subtarget.hasPCLMUL() && "PCLMUL required for CLMUL lowering");
+    assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!");
+    if (NoImplicitFloatOps)
+      return;
+    SDValue LHS = N->getOperand(0);
+    SDValue RHS = N->getOperand(1);
+    bool IsHigh = Opc == ISD::CLMULH;
+    SDValue Res =
+        DAG.getNode(X86ISD::PCLMULQDQ, dl, MVT::v2i64,
+                    DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, LHS),
+                    DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, RHS),
+                    DAG.getTargetConstant(0, dl, MVT::i8));
+    Res = DAG.getExtractVectorElt(dl, MVT::i64, Res, IsHigh ? 1 : 0);
+    Results.push_back(Res);
+    return;
+  }
   // We might have generated v2f32 FMIN/FMAX operations. Widen them to v4f32.
   case X86ISD::FMINC:
   case X86ISD::FMIN:
@@ -35148,9 +35165,6 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N,
     assert(
         (N->getValueType(0) == MVT::i64 || N->getValueType(0) == MVT::i128) &&
         "Unexpected VT!");
-    bool NoImplicitFloatOps =
-        DAG.getMachineFunction().getFunction().hasFnAttribute(
-            Attribute::NoImplicitFloat);
     if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) {
       auto *Node = cast<AtomicSDNode>(N);
 

diff  --git a/llvm/test/CodeGen/X86/clmul-x86.ll b/llvm/test/CodeGen/X86/clmul-x86.ll
index c9a7cde29e30c..cac2625f66fa1 100644
--- a/llvm/test/CodeGen/X86/clmul-x86.ll
+++ b/llvm/test/CodeGen/X86/clmul-x86.ll
@@ -18,76 +18,12 @@ define i32 @clmul_i32(i32 %a, i32 %b) nounwind {
 define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: clmul_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    movd %ecx, %xmm0
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %edx
-; CHECK-NEXT:    shrl $4, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %edx, %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT:    shrl %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    leal (%ecx,%edx,2), %ecx
-; CHECK-NEXT:    movd %ecx, %xmm2
-; CHECK-NEXT:    movd %eax, %xmm1
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %ecx
-; CHECK-NEXT:    shrl $4, %eax
-; CHECK-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %eax
-; CHECK-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT:    leal (%eax,%ecx,4), %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT:    leal (%eax,%ecx,2), %eax
-; CHECK-NEXT:    movd %eax, %xmm3
-; CHECK-NEXT:    pclmulqdq $0, %xmm2, %xmm3
-; CHECK-NEXT:    movd %xmm3, %eax
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %ecx
-; CHECK-NEXT:    shrl $4, %eax
-; CHECK-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %eax
-; CHECK-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT:    leal (%eax,%ecx,4), %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    andl $1431655764, %eax # imm = 0x55555554
-; CHECK-NEXT:    leal (%eax,%ecx,2), %eax
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT:    pclmulqdq $0, %xmm0, %xmm2
-; CHECK-NEXT:    movd %xmm2, %ecx
-; CHECK-NEXT:    movd {{.*#+}} xmm2 = mem[0],zero,zero,zero
-; CHECK-NEXT:    pclmulqdq $0, %xmm1, %xmm2
-; CHECK-NEXT:    movd %xmm2, %edx
-; CHECK-NEXT:    xorl %ecx, %edx
-; CHECK-NEXT:    xorl %eax, %edx
+; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
 ; CHECK-NEXT:    pclmulqdq $0, %xmm0, %xmm1
 ; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
+; CHECK-NEXT:    movd %xmm0, %edx
 ; CHECK-NEXT:    retl
   %res = call i64 @llvm.clmul.i64(i64 %a, i64 %b)
   ret i64 %res
@@ -96,103 +32,30 @@ define i64 @clmul_i64(i64 %a, i64 %b) nounwind {
 define i64 @clmulr_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: clmulr_i64:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushl %edi
 ; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %edx
-; CHECK-NEXT:    shrl $4, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %edx, %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT:    shrl %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    leal (%ecx,%edx,2), %ecx
-; CHECK-NEXT:    movd %ecx, %xmm0
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %edx
-; CHECK-NEXT:    shrl $4, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %edx, %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT:    shrl %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    leal (%ecx,%edx,2), %ecx
-; CHECK-NEXT:    movd %ecx, %xmm2
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %ecx
-; CHECK-NEXT:    shrl $4, %eax
-; CHECK-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %eax
-; CHECK-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT:    leal (%eax,%ecx,4), %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT:    leal (%eax,%ecx,2), %eax
-; CHECK-NEXT:    movd %eax, %xmm1
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %ecx
-; CHECK-NEXT:    shrl $4, %eax
-; CHECK-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %eax
-; CHECK-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT:    leal (%eax,%ecx,4), %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT:    leal (%eax,%ecx,2), %eax
-; CHECK-NEXT:    movd %eax, %xmm3
-; CHECK-NEXT:    pclmulqdq $0, %xmm2, %xmm3
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movd %xmm3, %ecx
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    movl %ecx, %esi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; CHECK-NEXT:    bswapl %esi
+; CHECK-NEXT:    movl %esi, %edi
+; CHECK-NEXT:    andl $252645135, %edi # imm = 0xF0F0F0F
+; CHECK-NEXT:    shll $4, %edi
+; CHECK-NEXT:    shrl $4, %esi
 ; CHECK-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %esi
-; CHECK-NEXT:    shrl $4, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %esi, %ecx
-; CHECK-NEXT:    movl %ecx, %esi
+; CHECK-NEXT:    orl %edi, %esi
+; CHECK-NEXT:    movl %esi, %edi
+; CHECK-NEXT:    andl $858993459, %edi # imm = 0x33333333
+; CHECK-NEXT:    shrl $2, %esi
 ; CHECK-NEXT:    andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    leal (%ecx,%esi,4), %ecx
-; CHECK-NEXT:    movl %ecx, %esi
+; CHECK-NEXT:    leal (%esi,%edi,4), %esi
+; CHECK-NEXT:    movl %esi, %edi
+; CHECK-NEXT:    andl $1431655765, %edi # imm = 0x55555555
+; CHECK-NEXT:    shrl %esi
 ; CHECK-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT:    shrl %ecx
-; CHECK-NEXT:    andl $1431655764, %ecx # imm = 0x55555554
-; CHECK-NEXT:    leal (%ecx,%esi,2), %ecx
-; CHECK-NEXT:    shrl %ecx
+; CHECK-NEXT:    leal (%esi,%edi,2), %esi
+; CHECK-NEXT:    movd %esi, %xmm1
 ; CHECK-NEXT:    bswapl %edx
 ; CHECK-NEXT:    movl %edx, %esi
 ; CHECK-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
@@ -210,101 +73,8 @@ define i64 @clmulr_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-NEXT:    shrl %edx
 ; CHECK-NEXT:    andl $1431655765, %edx # imm = 0x55555555
 ; CHECK-NEXT:    leal (%edx,%esi,2), %edx
-; CHECK-NEXT:    movd %edx, %xmm2
-; CHECK-NEXT:    pclmulqdq $0, %xmm0, %xmm2
-; CHECK-NEXT:    movd %xmm2, %edx
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %esi
-; CHECK-NEXT:    shrl $4, %eax
-; CHECK-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %eax
-; CHECK-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT:    leal (%eax,%esi,4), %eax
-; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT:    leal (%eax,%esi,2), %eax
-; CHECK-NEXT:    movd %eax, %xmm2
-; CHECK-NEXT:    pclmulqdq $0, %xmm1, %xmm2
-; CHECK-NEXT:    movd %xmm2, %eax
-; CHECK-NEXT:    xorl %edx, %eax
-; CHECK-NEXT:    xorl %ecx, %eax
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %ecx
-; CHECK-NEXT:    shrl $4, %eax
-; CHECK-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %ecx, %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %eax
-; CHECK-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT:    leal (%eax,%ecx,4), %eax
-; CHECK-NEXT:    movl %eax, %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT:    leal (%eax,%ecx,2), %eax
-; CHECK-NEXT:    pclmulqdq $0, %xmm0, %xmm1
-; CHECK-NEXT:    movd %xmm1, %ecx
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %edx
-; CHECK-NEXT:    shrl $4, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %edx, %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT:    shrl %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    leal (%ecx,%edx,2), %edx
-; CHECK-NEXT:    popl %esi
-; CHECK-NEXT:    retl
-  %a.ext = zext i64 %a to i128
-  %b.ext = zext i64 %b to i128
-  %clmul = call i128 @llvm.clmul.i128(i128 %a.ext, i128 %b.ext)
-  %res.ext = lshr i128 %clmul, 63
-  %res = trunc i128 %res.ext to i64
-  ret i64 %res
-}
-
-define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
-; CHECK-LABEL: clmulh_i64:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    pushl %esi
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %edx
-; CHECK-NEXT:    shrl $4, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %edx, %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT:    shrl %ecx
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    leal (%ecx,%edx,2), %ecx
-; CHECK-NEXT:    movd %ecx, %xmm0
+; CHECK-NEXT:    movd %edx, %xmm0
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
 ; CHECK-NEXT:    bswapl %ecx
 ; CHECK-NEXT:    movl %ecx, %edx
 ; CHECK-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
@@ -341,6 +111,9 @@ define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-NEXT:    andl $1431655765, %eax # imm = 0x55555555
 ; CHECK-NEXT:    leal (%eax,%ecx,2), %eax
 ; CHECK-NEXT:    movd %eax, %xmm1
+; CHECK-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
+; CHECK-NEXT:    pclmulqdq $0, %xmm0, %xmm1
+; CHECK-NEXT:    movd %xmm1, %eax
 ; CHECK-NEXT:    bswapl %eax
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
@@ -357,72 +130,9 @@ define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
 ; CHECK-NEXT:    shrl %eax
 ; CHECK-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT:    leal (%eax,%ecx,2), %eax
-; CHECK-NEXT:    movd %eax, %xmm3
-; CHECK-NEXT:    pclmulqdq $0, %xmm2, %xmm3
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; CHECK-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; CHECK-NEXT:    movd %xmm3, %ecx
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    movl %ecx, %esi
-; CHECK-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %esi
-; CHECK-NEXT:    shrl $4, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %esi, %ecx
-; CHECK-NEXT:    movl %ecx, %esi
-; CHECK-NEXT:    andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    leal (%ecx,%esi,4), %ecx
-; CHECK-NEXT:    movl %ecx, %esi
-; CHECK-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT:    shrl %ecx
-; CHECK-NEXT:    andl $1431655764, %ecx # imm = 0x55555554
-; CHECK-NEXT:    leal (%ecx,%esi,2), %ecx
-; CHECK-NEXT:    shrl %ecx
-; CHECK-NEXT:    bswapl %edx
-; CHECK-NEXT:    movl %edx, %esi
-; CHECK-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %esi
-; CHECK-NEXT:    shrl $4, %edx
-; CHECK-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %esi, %edx
-; CHECK-NEXT:    movl %edx, %esi
-; CHECK-NEXT:    andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %edx
-; CHECK-NEXT:    andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT:    leal (%edx,%esi,4), %edx
-; CHECK-NEXT:    movl %edx, %esi
-; CHECK-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT:    shrl %edx
-; CHECK-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT:    leal (%edx,%esi,2), %edx
-; CHECK-NEXT:    movd %edx, %xmm2
-; CHECK-NEXT:    pclmulqdq $0, %xmm0, %xmm2
-; CHECK-NEXT:    movd %xmm2, %edx
-; CHECK-NEXT:    bswapl %eax
-; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %esi
-; CHECK-NEXT:    shrl $4, %eax
-; CHECK-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %esi, %eax
-; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    andl $858993459, %esi # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %eax
-; CHECK-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; CHECK-NEXT:    leal (%eax,%esi,4), %eax
-; CHECK-NEXT:    movl %eax, %esi
-; CHECK-NEXT:    andl $1431655765, %esi # imm = 0x55555555
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; CHECK-NEXT:    leal (%eax,%esi,2), %eax
-; CHECK-NEXT:    movd %eax, %xmm2
-; CHECK-NEXT:    pclmulqdq $0, %xmm1, %xmm2
-; CHECK-NEXT:    movd %xmm2, %eax
-; CHECK-NEXT:    xorl %edx, %eax
-; CHECK-NEXT:    xorl %ecx, %eax
+; CHECK-NEXT:    leal (%eax,%ecx,2), %edx
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
+; CHECK-NEXT:    movd %xmm0, %eax
 ; CHECK-NEXT:    bswapl %eax
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
@@ -438,30 +148,29 @@ define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
 ; CHECK-NEXT:    movl %eax, %ecx
 ; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
 ; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    andl $1431655764, %eax # imm = 0x55555554
+; CHECK-NEXT:    andl $1431655765, %eax # imm = 0x55555555
 ; CHECK-NEXT:    leal (%eax,%ecx,2), %eax
-; CHECK-NEXT:    pclmulqdq $0, %xmm0, %xmm1
-; CHECK-NEXT:    movd %xmm1, %ecx
-; CHECK-NEXT:    bswapl %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; CHECK-NEXT:    shll $4, %edx
-; CHECK-NEXT:    shrl $4, %ecx
-; CHECK-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; CHECK-NEXT:    orl %edx, %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    andl $858993459, %edx # imm = 0x33333333
-; CHECK-NEXT:    shrl $2, %ecx
-; CHECK-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; CHECK-NEXT:    leal (%ecx,%edx,4), %ecx
-; CHECK-NEXT:    movl %ecx, %edx
-; CHECK-NEXT:    shrl %edx
-; CHECK-NEXT:    shrdl $1, %edx, %eax
-; CHECK-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; CHECK-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; CHECK-NEXT:    leal (%edx,%ecx,2), %edx
-; CHECK-NEXT:    shrl %edx
 ; CHECK-NEXT:    popl %esi
+; CHECK-NEXT:    popl %edi
+; CHECK-NEXT:    retl
+  %a.ext = zext i64 %a to i128
+  %b.ext = zext i64 %b to i128
+  %clmul = call i128 @llvm.clmul.i128(i128 %a.ext, i128 %b.ext)
+  %res.ext = lshr i128 %clmul, 63
+  %res = trunc i128 %res.ext to i64
+  ret i64 %res
+}
+
+define i64 @clmulh_i64(i64 %a, i64 %b) nounwind {
+; CHECK-LABEL: clmulh_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    pclmulqdq $0, %xmm0, %xmm1
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
+; CHECK-NEXT:    movd %xmm0, %eax
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
+; CHECK-NEXT:    movd %xmm0, %edx
 ; CHECK-NEXT:    retl
   %a.ext = zext i64 %a to i128
   %b.ext = zext i64 %b to i128


        


More information about the llvm-commits mailing list