[llvm] r251378 - [x86] replace integer logic ops with packed SSE FP logic ops

Mon Oct 26 18:28:07 PDT 2015

Author: spatel
Date: Mon Oct 26 20:28:07 2015
New Revision: 251378

URL: http://llvm.org/viewvc/llvm-project?rev=251378&view=rev
Log:
[x86] replace integer logic ops with packed SSE FP logic ops

If we have an operand to a bitwise logic op that's already in
an XMM register and the result is going to be sent to an XMM
register, then use an SSE logic op to avoid moves between the
integer and vector register files.

Related commits:
http://reviews.llvm.org/rL248395
http://reviews.llvm.org/rL248399
http://reviews.llvm.org/rL248404
http://reviews.llvm.org/rL248409
http://reviews.llvm.org/rL248415

This should solve PR22428:
https://llvm.org/bugs/show_bug.cgi?id=22428


Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/fp-logic.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=251378&r1=251377&r2=251378&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Oct 26 20:28:07 2015
@@ -23123,7 +23123,8 @@ static SDValue XFormVExtractWithShuffleI
                      EltNo);
 }
 
-static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue PerformBITCASTCombine(SDNode *N, SelectionDAG &DAG,
+                                     const X86Subtarget *Subtarget) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N->getValueType(0);
 
@@ -23139,6 +23140,29 @@ static SDValue PerformBITCASTCombine(SDN
       return DAG.getNode(X86ISD::MMX_MOVW2D, SDLoc(N00), VT, N00);
   }
 
+  // Convert a bitcasted integer logic operation that has one bitcasted
+  // floating-point operand and one constant operand into a floating-point
+  // logic operation. This may create a load of the constant, but that is
+  // cheaper than materializing the constant in an integer register and
+  // transferring it to an SSE register or transferring the SSE operand to
+  // integer register and back.
+  unsigned FPOpcode;
+  switch (N0.getOpcode()) {
+    case ISD::AND: FPOpcode = X86ISD::FAND; break;
+    case ISD::OR:  FPOpcode = X86ISD::FOR;  break;
+    case ISD::XOR: FPOpcode = X86ISD::FXOR; break;
+    default: return SDValue();
+  }
+  if (((Subtarget->hasSSE1() && VT == MVT::f32) ||
+       (Subtarget->hasSSE2() && VT == MVT::f64)) &&
+      isa<ConstantSDNode>(N0.getOperand(1)) &&
+      N0.getOperand(0).getOpcode() == ISD::BITCAST &&
+      N0.getOperand(0).getOperand(0).getValueType() == VT) {
+    SDValue N000 = N0.getOperand(0).getOperand(0);
+    SDValue FPConst = DAG.getBitcast(VT, N0.getOperand(1));
+    return DAG.getNode(FPOpcode, SDLoc(N0), VT, N000, FPConst);
+  }
+
   return SDValue();
 }
 
@@ -26635,7 +26659,7 @@ SDValue X86TargetLowering::PerformDAGCom
   case ISD::SELECT:
   case X86ISD::SHRUNKBLEND:
     return PerformSELECTCombine(N, DAG, DCI, Subtarget);
-  case ISD::BITCAST:        return PerformBITCASTCombine(N, DAG);
+  case ISD::BITCAST:        return PerformBITCASTCombine(N, DAG, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI, Subtarget);
   case ISD::ADD:            return PerformAddCombine(N, DAG, Subtarget);
   case ISD::SUB:            return PerformSubCombine(N, DAG, Subtarget);

Modified: llvm/trunk/test/CodeGen/X86/fp-logic.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fp-logic.ll?rev=251378&r1=251377&r2=251378&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fp-logic.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fp-logic.ll Mon Oct 26 20:28:07 2015
@@ -110,9 +110,8 @@ define float @f6(float %x, i32 %y) {
 define float @f7(float %x) {
 ; CHECK-LABEL: f7:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    andl $3, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    andps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 
   %bc1 = bitcast float %x to i32
@@ -126,9 +125,8 @@ define float @f7(float %x) {
 define float @f8(float %x) {
 ; CHECK-LABEL: f8:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    andl $4, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    andps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 
   %bc1 = bitcast float %x to i32
@@ -196,9 +194,8 @@ define float @xor(float %x, float %y) {
 define float @f7_or(float %x) {
 ; CHECK-LABEL: f7_or:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    orl $3, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    orps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 
   %bc1 = bitcast float %x to i32
@@ -210,9 +207,8 @@ define float @f7_or(float %x) {
 define float @f7_xor(float %x) {
 ; CHECK-LABEL: f7_xor:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %eax
-; CHECK-NEXT:    xorl $3, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    xorps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 
   %bc1 = bitcast float %x to i32
@@ -239,9 +235,8 @@ define double @doubles(double %x, double
 define double @f7_double(double %x) {
 ; CHECK-LABEL: f7_double:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movd %xmm0, %rax
-; CHECK-NEXT:    andl $3, %eax
-; CHECK-NEXT:    movd %rax, %xmm0
+; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
+; CHECK-NEXT:    andpd %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 
   %bc1 = bitcast double %x to i64
@@ -257,9 +252,8 @@ define double @f7_double(double %x) {
 define float @movmsk(float %x) {
 ; CHECK-LABEL: movmsk:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    movmskps %xmm0, %eax
-; CHECK-NEXT:    shll $31, %eax
-; CHECK-NEXT:    movd %eax, %xmm0
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; CHECK-NEXT:    andps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
 
   %bc1 = bitcast float %x to i32