[llvm] r347185 - [X86] Use a pcmpgt with 0 instead of psrad 31, to fill elements with the sign bit in v4i32 MULH lowering.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 18 23:22:26 PST 2018


Author: ctopper
Date: Sun Nov 18 23:22:26 2018
New Revision: 347185

URL: http://llvm.org/viewvc/llvm-project?rev=347185&view=rev
Log:
[X86] Use a pcmpgt with 0 instead of psrad 31, to fill elements with the sign bit in v4i32 MULH lowering.

The shift requires a copy to avoid clobbering a register. Comparing with 0 uses an xor to produce 0 that will be overwritten with the compare results. So still requires 2 instructions, but should be one byte shorter since it doesn't need to encode an immediate.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll
    llvm/trunk/test/CodeGen/X86/vector-idiv.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=347185&r1=347184&r2=347185&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Nov 18 23:22:26 2018
@@ -23591,11 +23591,11 @@ static SDValue LowerMULH(SDValue Op, con
     // If we have a signed multiply but no PMULDQ fix up the result of an
     // unsigned multiply.
     if (IsSigned && !Subtarget.hasSSE41()) {
-      SDValue ShAmt = DAG.getConstant(31, dl, VT);
+      SDValue Zero = DAG.getConstant(0, dl, VT);
       SDValue T1 = DAG.getNode(ISD::AND, dl, VT,
-                               DAG.getNode(ISD::SRA, dl, VT, A, ShAmt), B);
+                               DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B);
       SDValue T2 = DAG.getNode(ISD::AND, dl, VT,
-                               DAG.getNode(ISD::SRA, dl, VT, B, ShAmt), A);
+                               DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A);
 
       SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2);
       Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup);

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll?rev=347185&r1=347184&r2=347185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv-sdiv-128.ll Sun Nov 18 23:22:26 2018
@@ -86,8 +86,8 @@ define <4 x i32> @test_div7_4i32(<4 x i3
 ; SSE2-NEXT:    pmuludq %xmm2, %xmm3
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    psrad $31, %xmm3
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
 ; SSE2-NEXT:    pand %xmm2, %xmm3
 ; SSE2-NEXT:    paddd %xmm0, %xmm3
 ; SSE2-NEXT:    psubd %xmm3, %xmm1
@@ -386,8 +386,8 @@ define <4 x i32> @test_rem7_4i32(<4 x i3
 ; SSE2-NEXT:    pmuludq %xmm1, %xmm3
 ; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
 ; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT:    movdqa %xmm0, %xmm3
-; SSE2-NEXT:    psrad $31, %xmm3
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
 ; SSE2-NEXT:    pand %xmm1, %xmm3
 ; SSE2-NEXT:    paddd %xmm0, %xmm3
 ; SSE2-NEXT:    psubd %xmm3, %xmm2

Modified: llvm/trunk/test/CodeGen/X86/vector-idiv.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-idiv.ll?rev=347185&r1=347184&r2=347185&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-idiv.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-idiv.ll Sun Nov 18 23:22:26 2018
@@ -24,19 +24,19 @@ define <4 x i32> @PR20355(<4 x i32> %a)
 ; SSE2-LABEL: PR20355:
 ; SSE2:       # %bb.0: # %entry
 ; SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [1431655766,1431655766,1431655766,1431655766]
-; SSE2-NEXT:    movdqa %xmm0, %xmm2
+; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
+; SSE2-NEXT:    pxor %xmm3, %xmm3
+; SSE2-NEXT:    pcmpgtd %xmm0, %xmm3
+; SSE2-NEXT:    pmuludq %xmm1, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm4 = xmm0[1,3,2,3]
 ; SSE2-NEXT:    pmuludq %xmm1, %xmm2
-; SSE2-NEXT:    pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3]
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3]
-; SSE2-NEXT:    pmuludq %xmm1, %xmm3
-; SSE2-NEXT:    pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3]
-; SSE2-NEXT:    punpckldq {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1]
-; SSE2-NEXT:    psrad $31, %xmm0
-; SSE2-NEXT:    pand %xmm1, %xmm0
-; SSE2-NEXT:    psubd %xmm0, %xmm2
-; SSE2-NEXT:    movdqa %xmm2, %xmm0
+; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,3,2,3]
+; SSE2-NEXT:    punpckldq {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1]
+; SSE2-NEXT:    pand %xmm1, %xmm3
+; SSE2-NEXT:    psubd %xmm3, %xmm4
+; SSE2-NEXT:    movdqa %xmm4, %xmm0
 ; SSE2-NEXT:    psrld $31, %xmm0
-; SSE2-NEXT:    paddd %xmm2, %xmm0
+; SSE2-NEXT:    paddd %xmm4, %xmm0
 ; SSE2-NEXT:    retq
 ;
 ; SSE41-LABEL: PR20355:




More information about the llvm-commits mailing list