[llvm] r374902 - [DAGCombiner] fold select-of-constants based on sign-bit test

Tue Oct 15 08:23:58 PDT 2019

Author: spatel
Date: Tue Oct 15 08:23:57 2019
New Revision: 374902

URL: http://llvm.org/viewvc/llvm-project?rev=374902&view=rev
Log:
[DAGCombiner] fold select-of-constants based on sign-bit test

Examples:
  i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
  i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1

This is a small generalization of a fold requested in PR43650:
https://bugs.llvm.org/show_bug.cgi?id=43650

The sign-bit of the condition operand can be used as a mask for the true operand:
https://rise4fun.com/Alive/paT

Note that we already handle some of the patterns (isNegative + scalar) because
there's an over-specialized, yet over-reaching fold for that in foldSelectCCToShiftAnd().
It doesn't use any TLI hooks, so I can't easily rip out that code even though we're
duplicating part of it here. This fold is guarded by TLI.convertSelectOfConstantsToMath(),
so it should not cause problems for targets that prefer select over shift.

Also worth noting: I thought we could generalize this further to include the case where
the true operand of the select is not constant, but Alive says that may allow poison to
pass through where it does not in the original select form of the code.

Differential Revision: https://reviews.llvm.org/D68949

Modified:
    llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/trunk/test/CodeGen/X86/select-sra.ll

Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=374902&r1=374901&r2=374902&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Tue Oct 15 08:23:57 2019
@@ -8173,6 +8173,43 @@ static SDValue combineMinNumMaxNum(const
   }
 }
 
+/// If a (v)select has a condition value that is a sign-bit test, try to smear
+/// the condition operand sign-bit across the value width and use it as a mask.
+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
+  SDValue Cond = N->getOperand(0);
+  SDValue C1 = N->getOperand(1);
+  SDValue C2 = N->getOperand(2);
+  assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
+         "Expected select-of-constants");
+
+  EVT VT = N->getValueType(0);
+  if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
+      VT != Cond.getOperand(0).getValueType())
+    return SDValue();
+
+  // The inverted-condition + commuted-select variants of these patterns are
+  // canonicalized to these forms in IR.
+  SDValue X = Cond.getOperand(0);
+  SDValue CondC = Cond.getOperand(1);
+  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+  if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
+      isAllOnesOrAllOnesSplat(C2)) {
+    // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
+    SDLoc DL(N);
+    SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+    return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
+  }
+  if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
+    // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
+    SDLoc DL(N);
+    SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+    SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+    return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
+  }
+  return SDValue();
+}
+
 SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
   SDValue Cond = N->getOperand(0);
   SDValue N1 = N->getOperand(1);
@@ -8248,6 +8285,9 @@ SDValue DAGCombiner::foldSelectOfConstan
         SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
         return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
       }
+
+      if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+        return V;
     }
 
     return SDValue();
@@ -8623,6 +8663,9 @@ SDValue DAGCombiner::foldVSelectOfConsta
     return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
   }
 
+  if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+    return V;
+
   // The general case for select-of-constants:
   // vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
   // ...but that only makes sense if a vselect is slower than 2 logic ops, so

Modified: llvm/trunk/test/CodeGen/X86/select-sra.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/select-sra.ll?rev=374902&r1=374901&r2=374902&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/select-sra.ll (original)
+++ llvm/trunk/test/CodeGen/X86/select-sra.ll Tue Oct 15 08:23:57 2019
@@ -4,10 +4,9 @@
 define i8 @isnonneg_i8(i8 %x) {
 ; CHECK-LABEL: isnonneg_i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testb %dil, %dil
-; CHECK-NEXT:    movl $42, %ecx
-; CHECK-NEXT:    movl $255, %eax
-; CHECK-NEXT:    cmovnsl %ecx, %eax
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarb $7, %al
+; CHECK-NEXT:    orb $42, %al
 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
 ; CHECK-NEXT:    retq
   %cond = icmp sgt i8 %x, -1
@@ -18,10 +17,9 @@ define i8 @isnonneg_i8(i8 %x) {
 define i16 @isnonneg_i16(i16 %x) {
 ; CHECK-LABEL: isnonneg_i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testw %di, %di
-; CHECK-NEXT:    movl $542, %ecx # imm = 0x21E
-; CHECK-NEXT:    movl $65535, %eax # imm = 0xFFFF
-; CHECK-NEXT:    cmovnsl %ecx, %eax
+; CHECK-NEXT:    movswl %di, %eax
+; CHECK-NEXT:    sarl $15, %eax
+; CHECK-NEXT:    orl $542, %eax # imm = 0x21E
 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 ; CHECK-NEXT:    retq
   %cond = icmp sgt i16 %x, -1
@@ -32,10 +30,9 @@ define i16 @isnonneg_i16(i16 %x) {
 define i32 @isnonneg_i32(i32 %x) {
 ; CHECK-LABEL: isnonneg_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testl %edi, %edi
-; CHECK-NEXT:    movl $-42, %ecx
-; CHECK-NEXT:    movl $-1, %eax
-; CHECK-NEXT:    cmovnsl %ecx, %eax
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    sarl $31, %eax
+; CHECK-NEXT:    orl $-42, %eax
 ; CHECK-NEXT:    retq
   %cond = icmp sgt i32 %x, -1
   %r = select i1 %cond, i32 -42, i32 -1
@@ -45,10 +42,9 @@ define i32 @isnonneg_i32(i32 %x) {
 define i64 @isnonneg_i64(i64 %x) {
 ; CHECK-LABEL: isnonneg_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    testq %rdi, %rdi
-; CHECK-NEXT:    movl $2342342, %ecx # imm = 0x23BDC6
-; CHECK-NEXT:    movq $-1, %rax
-; CHECK-NEXT:    cmovnsq %rcx, %rax
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    sarq $63, %rax
+; CHECK-NEXT:    orq $2342342, %rax # imm = 0x23BDC6
 ; CHECK-NEXT:    retq
   %cond = icmp sgt i64 %x, -1
   %r = select i1 %cond, i64 2342342, i64 -1
@@ -58,10 +54,10 @@ define i64 @isnonneg_i64(i64 %x) {
 define <16 x i8> @isnonneg_v16i8(<16 x i8> %x) {
 ; CHECK-LABEL: isnonneg_v16i8:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtb %xmm1, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
-; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
+; CHECK-NEXT:    pxor %xmm1, %xmm1
+; CHECK-NEXT:    pcmpgtb %xmm0, %xmm1
+; CHECK-NEXT:    por {{.*}}(%rip), %xmm1
+; CHECK-NEXT:    movdqa %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp sgt <16 x i8> %x, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
   %r = select <16 x i1> %cond, <16 x i8> <i8 12, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42, i8 42>, <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
@@ -71,9 +67,7 @@ define <16 x i8> @isnonneg_v16i8(<16 x i
 define <8 x i16> @isnonneg_v8i16(<8 x i16> %x) {
 ; CHECK-LABEL: isnonneg_v8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtw %xmm1, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    psraw $15, %xmm0
 ; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp sgt <8 x i16> %x, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
@@ -84,9 +78,7 @@ define <8 x i16> @isnonneg_v8i16(<8 x i1
 define <4 x i32> @isnonneg_v4i32(<4 x i32> %x) {
 ; CHECK-LABEL: isnonneg_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtd %xmm1, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
 ; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp sgt <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
@@ -97,18 +89,8 @@ define <4 x i32> @isnonneg_v4i32(<4 x i3
 define <2 x i64> @isnonneg_v2i64(<2 x i64> %x) {
 ; CHECK-LABEL: isnonneg_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pxor {{.*}}(%rip), %xmm0
-; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [18446744071562067967,18446744071562067967]
-; CHECK-NEXT:    movdqa %xmm0, %xmm2
-; CHECK-NEXT:    pcmpgtd %xmm1, %xmm2
-; CHECK-NEXT:    pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
-; CHECK-NEXT:    pand %xmm3, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
-; CHECK-NEXT:    por %xmm0, %xmm1
-; CHECK-NEXT:    pcmpeqd %xmm0, %xmm0
-; CHECK-NEXT:    pxor %xmm1, %xmm0
 ; CHECK-NEXT:    por {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp sgt <2 x i64> %x, <i64 -1, i64 -1>
@@ -182,10 +164,8 @@ define <16 x i8> @isneg_v16i8(<16 x i8>
 define <8 x i16> @isneg_v8i16(<8 x i16> %x) {
 ; CHECK-LABEL: isneg_v8i16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtw %xmm0, %xmm1
-; CHECK-NEXT:    pand {{.*}}(%rip), %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    psraw $15, %xmm0
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp slt <8 x i16> %x, zeroinitializer
   %r = select <8 x i1> %cond, <8 x i16> <i16 1, i16 542, i16 542, i16 542, i16 542, i16 542, i16 542, i16 1>, <8 x i16> zeroinitializer
@@ -195,10 +175,8 @@ define <8 x i16> @isneg_v8i16(<8 x i16>
 define <4 x i32> @isneg_v4i32(<4 x i32> %x) {
 ; CHECK-LABEL: isneg_v4i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    pxor %xmm1, %xmm1
-; CHECK-NEXT:    pcmpgtd %xmm0, %xmm1
-; CHECK-NEXT:    pand {{.*}}(%rip), %xmm1
-; CHECK-NEXT:    movdqa %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
+; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp slt <4 x i32> %x, zeroinitializer
   %r = select <4 x i1> %cond, <4 x i32> <i32 0, i32 42, i32 -42, i32 1>, <4 x i32> zeroinitializer
@@ -208,15 +186,8 @@ define <4 x i32> @isneg_v4i32(<4 x i32>
 define <2 x i64> @isneg_v2i64(<2 x i64> %x) {
 ; CHECK-LABEL: isneg_v2i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movdqa {{.*#+}} xmm1 = [2147483648,2147483648]
-; CHECK-NEXT:    pxor %xmm1, %xmm0
-; CHECK-NEXT:    movdqa %xmm1, %xmm2
-; CHECK-NEXT:    pcmpgtd %xmm0, %xmm2
-; CHECK-NEXT:    pcmpeqd %xmm1, %xmm0
-; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
-; CHECK-NEXT:    pand %xmm2, %xmm1
-; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
-; CHECK-NEXT:    por %xmm1, %xmm0
+; CHECK-NEXT:    psrad $31, %xmm0
+; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
 ; CHECK-NEXT:    pand {{.*}}(%rip), %xmm0
 ; CHECK-NEXT:    retq
   %cond = icmp slt <2 x i64> %x, zeroinitializer