[llvm] r235922 - [AArch64] Also combine vector selects fed by non-i1 SETCCs.

Mon Apr 27 14:43:12 PDT 2015

Author: ab
Date: Mon Apr 27 16:43:12 2015
New Revision: 235922

URL: http://llvm.org/viewvc/llvm-project?rev=235922&view=rev
Log:
[AArch64] Also combine vector selects fed by non-i1 SETCCs.

After legalization, scalar SETCC has an i32 result type on AArch64.
The i1 requirement seems too conservative, replace it with an assert.

This also means that we now can run after legalization. That should also
be fine, since the ops legalizer runs again after each combine, and
all types created all have the same sizes as the (legal) inputs.

Exposed by r235917; while there, robustize its tests (bsl also uses the
register it defines).

Modified:
    llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
    llvm/trunk/test/CodeGen/AArch64/arm64-neon-select_cc.ll
    llvm/trunk/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll

Modified: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp?rev=235922&r1=235921&r2=235922&view=diff
==============================================================================

--- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp Mon Apr 27 16:43:12 2015
@@ -8644,13 +8644,21 @@ static SDValue performVSelectCombine(SDN
 /// the compare-mask instructions rather than going via NZCV, even if LHS and
 /// RHS are really scalar. This replaces any scalar setcc in the above pattern
 /// with a vector one followed by a DUP shuffle on the result.
-static SDValue performSelectCombine(SDNode *N, SelectionDAG &DAG) {
+static SDValue performSelectCombine(SDNode *N,
+                                    TargetLowering::DAGCombinerInfo &DCI) {
+  SelectionDAG &DAG = DCI.DAG;
   SDValue N0 = N->getOperand(0);
   EVT ResVT = N->getValueType(0);
 
-  if (N0.getOpcode() != ISD::SETCC || N0.getValueType() != MVT::i1)
+  if (N0.getOpcode() != ISD::SETCC)
     return SDValue();
 
+  // Make sure the SETCC result is either i1 (initial DAG), or i32, the lowered
+  // scalar SetCCResultType. We also don't expect vectors, because we assume
+  // that selects fed by vector SETCCs are canonicalized to VSELECT.
+  assert((N0.getValueType() == MVT::i1 || N0.getValueType() == MVT::i32) &&
+         "Scalar-SETCC feeding SELECT has unexpected result type!");
+
   // If NumMaskElts == 0, the comparison is larger than select result. The
   // largest real NEON comparison is 64-bits per lane, which means the result is
   // at most 32-bits and an illegal vector. Just bail out for now.
@@ -8674,6 +8682,10 @@ static SDValue performSelectCombine(SDNo
   if (CCVT.getSizeInBits() != ResVT.getSizeInBits())
     return SDValue();
 
+  // Make sure we didn't create illegal types, if we're not supposed to.
+  assert(DCI.isBeforeLegalize() ||
+         DAG.getTargetLoweringInfo().isTypeLegal(SrcVT));
+
   // First perform a vector comparison, where lane 0 is the one we're interested
   // in.
   SDLoc DL(N0);
@@ -8721,7 +8733,7 @@ SDValue AArch64TargetLowering::PerformDA
   case ISD::CONCAT_VECTORS:
     return performConcatVectorsCombine(N, DCI, DAG);
   case ISD::SELECT:
-    return performSelectCombine(N, DAG);
+    return performSelectCombine(N, DCI);
   case ISD::VSELECT:
     return performVSelectCombine(N, DCI.DAG);
   case ISD::STORE:

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-neon-select_cc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-neon-select_cc.ll?rev=235922&r1=235921&r2=235922&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-neon-select_cc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-neon-select_cc.ll Mon Apr 27 16:43:12 2015
@@ -225,9 +225,9 @@ define <2 x i32> @test_select_cc_v2i32_i
 define <3 x float> @test_select_cc_v3f32_fcmp_f32(<3 x float> %a, <3 x float> %b, float %c1, float %c2) #0 {
 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f32:
 ; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].4s, v2.4s, v3.4s
-; CHECK-NEXT: dup [[VMASK:v[0-9]+]].4s, [[MASK]].s[0]
-; CHECK-NEXT: bsl [[RES:v[0-9]+]].16b, v0.16b, v1.16b
-; CHECK-NEXT: mov v0.16b, [[RES]].16b
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].4s, [[MASK]].s[0]
+; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
+; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
 ; CHECK-NEXT: ret
   %cc = fcmp oeq float %c1, %c2
   %r = select i1 %cc, <3 x float> %a, <3 x float> %b
@@ -236,12 +236,10 @@ define <3 x float> @test_select_cc_v3f32
 
 define <3 x float> @test_select_cc_v3f32_fcmp_f64(<3 x float> %a, <3 x float> %b, double %c1, double %c2) #0 {
 ; CHECK-LABEL: test_select_cc_v3f32_fcmp_f64:
-; CHECK-NEXT: fcmp d2, d3
-; CHECK-NEXT: movn [[N0:w[0-9]+]], #0
-; CHECK-NEXT: csel [[MASK:w[0-9]+]], [[N0]], wzr, eq
-; CHECK-NEXT: dup [[VMASK:v[0-9]+]].4s, [[MASK]]
-; CHECK-NEXT: bsl [[RES:v[0-9]+]].16b, v0.16b, v1.16b
-; CHECK-NEXT: mov v0.16b, [[RES]].16b
+; CHECK-NEXT: fcmeq [[MASK:v[0-9]+]].2d, v2.2d, v3.2d
+; CHECK-NEXT: dup [[DUPMASK:v[0-9]+]].2d, [[MASK]].d[0]
+; CHECK-NEXT: bsl [[DUPMASK:v[0-9]+]].16b, v0.16b, v1.16b
+; CHECK-NEXT: mov v0.16b, [[DUPMASK]].16b
 ; CHECK-NEXT: ret
   %cc = fcmp oeq double %c1, %c2
   %r = select i1 %cc, <3 x float> %a, <3 x float> %b

Modified: llvm/trunk/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll?rev=235922&r1=235921&r2=235922&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll (original)
+++ llvm/trunk/test/CodeGen/AArch64/arm64-neon-v1i1-setcc.ll Mon Apr 27 16:43:12 2015
@@ -50,9 +50,15 @@ define <1 x double> @test_select_v1i1_2(
   ret <1 x double> %res
 }
 
+; For v1i64, it's not clear which of the vector or scalar compare is better.
+; Let's stick to the vector form, like for all other vector selects fed by a
+; scalar setcc.  If anything, it exposes more ILP.
 define <1 x i64> @test_select_v1i1_3(i64 %lhs, i64 %rhs, <1 x i64> %v3) {
 ; CHECK-LABEL: test_select_v1i1_3:
-; CHECK: cmp {{x[0-9]+}}, {{x[0-9]+}}
+; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: fmov d{{[0-9]+}}, x{{[0-9]+}}
+; CHECK: cmeq d{{[0-9]+}}, d{{[0-9]+}}, d{{[0-9]+}}
+; CHECK: bic v{{[0-9]+}}.8b, v{{[0-9]+}}.8b, v{{[0-9]+}}.8b
   %tst = icmp eq i64 %lhs, %rhs
   %evil = insertelement <1 x i1> undef, i1 %tst, i32 0
   %res = select <1 x i1> %evil, <1 x i64> zeroinitializer, <1 x i64> %v3