[llvm] 7fce332 - [SDAG] allow vector types for select->logic folds

Tue Mar 2 06:30:04 PST 2021

Author: Sanjay Patel
Date: 2021-03-02T09:25:10-05:00
New Revision: 7fce3322a28303b864d912d8ac198f49e61f9f52

URL: https://github.com/llvm/llvm-project/commit/7fce3322a28303b864d912d8ac198f49e61f9f52
DIFF: https://github.com/llvm/llvm-project/commit/7fce3322a28303b864d912d8ac198f49e61f9f52.diff

LOG: [SDAG] allow vector types for select->logic folds

This prepares codegen for a change that will remove the identical
folds from IR because they are not poison-safe. See
D93065 / D97360
for details.

We already generically support scalar types, and there are various
target-specific transforms that overlap the vector folds. For example,
x86 recognizes the and patterns, but not or. We can end up with 1
extra instruction there, but I think that is still preferred over the
blendv alternative that loads a constant vector.

If this is not optimal, then it should be fixed with a later transform
(this change is not expected to result in any regressions because
InstCombine currently does the same thing).

Removing custom code and supporting undefs in constant-pattern-matching
can be follow-up changes.

Differential Revision: https://reviews.llvm.org/D97730

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
    llvm/test/CodeGen/AArch64/select-with-and-or.ll
    llvm/test/CodeGen/X86/select-with-and-or.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 12cd83806802..39003d632f93 100644

--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -9302,31 +9302,32 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
 }
 
 static SDValue foldBoolSelectToLogic(SDNode *N, SelectionDAG &DAG) {
-  assert(N->getOpcode() == ISD::SELECT && "Expected a select");
+  assert((N->getOpcode() == ISD::SELECT || N->getOpcode() == ISD::VSELECT) &&
+         "Expected a (v)select");
   SDValue Cond = N->getOperand(0);
   SDValue T = N->getOperand(1), F = N->getOperand(2);
   EVT VT = N->getValueType(0);
-  if (VT != Cond.getValueType() || VT != MVT::i1)
+  if (VT != Cond.getValueType() || VT.getScalarSizeInBits() != 1)
     return SDValue();
 
   // select Cond, Cond, F --> or Cond, F
   // select Cond, 1, F    --> or Cond, F
-  if (Cond == T || isOneConstant(T))
+  if (Cond == T || isOneOrOneSplat(T))
     return DAG.getNode(ISD::OR, SDLoc(N), VT, Cond, F);
 
   // select Cond, T, Cond --> and Cond, T
   // select Cond, T, 0    --> and Cond, T
-  if (Cond == F || isNullConstant(F))
+  if (Cond == F || isNullOrNullSplat(F))
     return DAG.getNode(ISD::AND, SDLoc(N), VT, Cond, T);
 
   // select Cond, T, 1 --> or (not Cond), T
-  if (isOneConstant(F)) {
+  if (isOneOrOneSplat(F)) {
     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
     return DAG.getNode(ISD::OR, SDLoc(N), VT, NotCond, T);
   }
 
   // select Cond, 0, F --> and (not Cond), F
-  if (isNullConstant(T)) {
+  if (isNullOrNullSplat(T)) {
     SDValue NotCond = DAG.getNOT(SDLoc(N), Cond, VT);
     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotCond, F);
   }
@@ -9788,6 +9789,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   if (SDValue V = DAG.simplifySelect(N0, N1, N2))
     return V;
 
+  if (SDValue V = foldBoolSelectToLogic(N, DAG))
+    return V;
+
   // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
   if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
     return DAG.getSelect(DL, VT, F, N2, N1);

diff  --git a/llvm/test/CodeGen/AArch64/select-with-and-or.ll b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
index 168371b61ecf..0cf84fac227c 100644
--- a/llvm/test/CodeGen/AArch64/select-with-and-or.ll
+++ b/llvm/test/CodeGen/AArch64/select-with-and-or.ll
@@ -66,7 +66,7 @@ define <4 x i1> @and_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    cmgt v1.4s, v2.4s, v3.4s
-; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    and v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    xtn v0.4h, v0.4s
 ; CHECK-NEXT:    ret
   %a = icmp eq <4 x i32> %x, %y
@@ -80,10 +80,8 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    cmgt v1.4s, v2.4s, v3.4s
+; CHECK-NEXT:    orr v0.16b, v0.16b, v1.16b
 ; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn v1.4h, v1.4s
-; CHECK-NEXT:    movi v2.4h, #1
-; CHECK-NEXT:    bsl v0.8b, v2.8b, v1.8b
 ; CHECK-NEXT:    ret
   %a = icmp eq <4 x i32> %x, %y
   %b = icmp sgt <4 x i32> %z, %w
@@ -96,9 +94,8 @@ define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    cmgt v1.4s, v2.4s, v3.4s
+; CHECK-NEXT:    bic v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    xtn v1.4h, v1.4s
-; CHECK-NEXT:    bic v0.8b, v1.8b, v0.8b
 ; CHECK-NEXT:    ret
   %a = icmp eq <4 x i32> %x, %y
   %b = icmp sgt <4 x i32> %z, %w
@@ -111,12 +108,8 @@ define <4 x i1> @or_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    cmeq v0.4s, v0.4s, v1.4s
 ; CHECK-NEXT:    cmgt v1.4s, v2.4s, v3.4s
-; CHECK-NEXT:    movi v2.4h, #1
-; CHECK-NEXT:    xtn v3.4h, v0.4s
-; CHECK-NEXT:    and v0.16b, v1.16b, v0.16b
+; CHECK-NEXT:    orn v0.16b, v1.16b, v0.16b
 ; CHECK-NEXT:    xtn v0.4h, v0.4s
-; CHECK-NEXT:    bic v1.8b, v2.8b, v3.8b
-; CHECK-NEXT:    orr v0.8b, v0.8b, v1.8b
 ; CHECK-NEXT:    ret
   %a = icmp eq <4 x i32> %x, %y
   %b = icmp sgt <4 x i32> %z, %w

diff  --git a/llvm/test/CodeGen/X86/select-with-and-or.ll b/llvm/test/CodeGen/X86/select-with-and-or.ll
index 4c05445ae44c..4793c8131bee 100644
--- a/llvm/test/CodeGen/X86/select-with-and-or.ll
+++ b/llvm/test/CodeGen/X86/select-with-and-or.ll
@@ -233,7 +233,7 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vpcmpgtd %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vblendvps %xmm0, {{.*}}(%rip), %xmm1, %xmm0
+; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %a = icmp eq <4 x i32> %x, %y
   %b = icmp sgt <4 x i32> %z, %w
@@ -244,9 +244,9 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w)
 define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) {
 ; CHECK-LABEL: and_not_vec:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    vpcmpgtd %xmm3, %xmm2, %xmm2
 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
-; CHECK-NEXT:    vpandn %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpgtd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT:    vpandn %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %a = icmp eq <4 x i32> %x, %y
   %b = icmp sgt <4 x i32> %z, %w
@@ -258,9 +258,10 @@ define <4 x i1> @or_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32>
 ; CHECK-LABEL: or_not_vec:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vpxor %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vpcmpgtd %xmm3, %xmm2, %xmm1
-; CHECK-NEXT:    vmovaps {{.*#+}} xmm2 = [1,1,1,1]
-; CHECK-NEXT:    vblendvps %xmm0, %xmm1, %xmm2, %xmm0
+; CHECK-NEXT:    vpor %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    retq
   %a = icmp eq <4 x i32> %x, %y
   %b = icmp sgt <4 x i32> %z, %w