[llvm] 0e8e731 - [X86] Allow combineVectorCompareAndMaskUnaryOp to handle 'all-bits' general case

Sat May 9 06:53:50 PDT 2020

Author: Simon Pilgrim
Date: 2020-05-09T14:53:25+01:00
New Revision: 0e8e731449d8fbfa6ba4523de928b4812ef9f100

URL: https://github.com/llvm/llvm-project/commit/0e8e731449d8fbfa6ba4523de928b4812ef9f100
DIFF: https://github.com/llvm/llvm-project/commit/0e8e731449d8fbfa6ba4523de928b4812ef9f100.diff

LOG: [X86] Allow combineVectorCompareAndMaskUnaryOp to handle 'all-bits' general case

For the sint_to_fp(and(X,C)) -> and(X,sint_to_fp(C)) fold, allow combineVectorCompareAndMaskUnaryOp to match any X that ComputeNumSignBits says is all-bits, not just SETCC.

Noticed while investigating mask promotion issues in PR45808

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f608d8b88362..7df01883ef90 100644

--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45564,10 +45564,11 @@ static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+// TODO: Could we move this to DAGCombine?
 static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
                                                   SelectionDAG &DAG) {
-  // Take advantage of vector comparisons producing 0 or -1 in each lane to
-  // optimize away operation when it's from a constant.
+  // Take advantage of vector comparisons (etc.) producing 0 or -1 in each lane
+  // to optimize away operation when it's from a constant.
   //
   // The general transformation is:
   //    UNARYOP(AND(VECTOR_CMP(x,y), constant)) -->
@@ -45579,10 +45580,10 @@ static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N,
   // aren't the same.
   EVT VT = N->getValueType(0);
   bool IsStrict = N->isStrictFPOpcode();
+  unsigned NumEltBits = VT.getScalarSizeInBits();
   SDValue Op0 = N->getOperand(IsStrict ? 1 : 0);
-  if (!VT.isVector() || Op0->getOpcode() != ISD::AND ||
-      Op0->getOperand(0)->getOpcode() != ISD::SETCC ||
-      VT.getSizeInBits() != Op0.getValueSizeInBits())
+  if (!VT.isVector() || Op0.getOpcode() != ISD::AND ||
+      DAG.ComputeNumSignBits(Op0.getOperand(0)) != NumEltBits)
     return SDValue();
 
   // Now check that the other operand of the AND is a constant. We could

diff  --git a/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
index 0274a8b45826..365ae9a61c27 100644
--- a/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
+++ b/llvm/test/CodeGen/X86/x86-setcc-int-to-fp-combine.ll
@@ -97,17 +97,16 @@ define void @foo4(<4 x float>* noalias %result) nounwind {
   ret void
 }
 
-; TODO: Test when we're masking against a sign extended setcc.
+; Test when we're masking against a sign extended setcc.
 define <4 x float> @foo5(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: LCPI5_0:
-; CHECK-NEXT: .long 1                       ## 0x1
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
 ; CHECK-NEXT: .long 0                       ## 0x0
-; CHECK-NEXT: .long 1                       ## 0x1
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
 ; CHECK-NEXT: .long 0                       ## 0x0
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    pcmpgtd %xmm1, %xmm0
 ; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    cvtdq2ps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %1 = icmp sgt <4 x i32> %a0, %a1
   %2 = sext <4 x i1> %1 to <4 x i32>
@@ -116,12 +115,12 @@ define <4 x float> @foo5(<4 x i32> %a0, <4 x i32> %a1) {
   ret <4 x float> %4
 }
 
-; TODO: Test when we're masking against mask arithmetic, not the setcc's directly.
+; Test when we're masking against mask arithmetic, not the setcc's directly.
 define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-LABEL: LCPI6_0:
-; CHECK-NEXT: .long 1                       ## 0x1
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
 ; CHECK-NEXT: .long 0                       ## 0x0
-; CHECK-NEXT: .long 1                       ## 0x1
+; CHECK-NEXT: .long 1065353216              ## 0x3f800000
 ; CHECK-NEXT: .long 0                       ## 0x0
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    movdqa %xmm0, %xmm2
@@ -130,7 +129,6 @@ define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) {
 ; CHECK-NEXT:    pcmpgtd %xmm1, %xmm0
 ; CHECK-NEXT:    pand %xmm2, %xmm0
 ; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    cvtdq2ps %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %1 = icmp sgt <4 x i32> %a0, %a1
   %2 = icmp sgt <4 x i32> %a0, zeroinitializer
@@ -139,4 +137,4 @@ define <4 x float> @foo6(<4 x i32> %a0, <4 x i32> %a1) {
   %5 = and <4 x i32> %4, <i32 1, i32 0, i32 1, i32 0>
   %6 = uitofp <4 x i32> %5 to <4 x float>
   ret <4 x float> %6
-}
\ No newline at end of file
+}