[llvm] r325949 - [X86] Add DAG combine to remove (and X, 1) from in front of a v1i1 scalar to vector.

Fri Feb 23 12:13:43 PST 2018

Author: ctopper
Date: Fri Feb 23 12:13:42 2018
New Revision: 325949

URL: http://llvm.org/viewvc/llvm-project?rev=325949&view=rev
Log:
[X86] Add DAG combine to remove (and X, 1) from in front of a v1i1 scalar to vector.

These can be created by type legalization promoting the inputs to select to match scalar boolean contents.

We were trying to pattern match them away during isel, but its better to just remove them from the DAG.

I've cleaned up some patterns to not check for this 'and' anymore. But I suspect this has also opened up opportunities for pattern removal.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=325949&r1=325948&r2=325949&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Feb 23 12:13:42 2018
@@ -1653,6 +1653,7 @@ X86TargetLowering::X86TargetLowering(con
 
   // We have target-specific dag combine patterns for the following nodes:
   setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
+  setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
   setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
@@ -38042,11 +38043,30 @@ static SDValue combineExtractSubvector(S
   return SDValue();
 }
 
+static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) {
+  EVT VT = N->getValueType(0);
+  SDValue Src = N->getOperand(0);
+
+  // If this is a scalar to vector to v1i1 from an AND with 1, bypass the and.
+  // This occurs frequently in our masked scalar intrinsic code and our
+  // floating point select lowering with AVX512.
+  // TODO: SimplifyDemandedBits instead?
+  if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse())
+    if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1)))
+      if (C->getAPIntValue().isOneValue())
+        return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), MVT::v1i1,
+                           Src.getOperand(0));
+
+  return SDValue();
+}
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
   switch (N->getOpcode()) {
   default: break;
+  case ISD::SCALAR_TO_VECTOR:
+    return combineScalarToVector(N, DAG);
   case ISD::EXTRACT_VECTOR_ELT:
   case X86ISD::PEXTRW:
   case X86ISD::PEXTRB:

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=325949&r1=325948&r2=325949&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Fri Feb 23 12:13:42 2018
@@ -3870,7 +3870,7 @@ multiclass avx512_move_scalar_lowering<s
 
 def : Pat<(_.VT (OpNode _.RC:$src0,
                         (_.VT (scalar_to_vector
-                                  (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
+                                  (_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))),
                                                        (_.EltVT _.FRC:$src1),
                                                        (_.EltVT _.FRC:$src2))))))),
           (!cast<Instruction>(InstrStr#rrk)
@@ -3881,7 +3881,7 @@ def : Pat<(_.VT (OpNode _.RC:$src0,
 
 def : Pat<(_.VT (OpNode _.RC:$src0,
                         (_.VT (scalar_to_vector
-                                  (_.EltVT (X86selects (scalar_to_vector (and (i8 (trunc GR32:$mask)), (i8 1))),
+                                  (_.EltVT (X86selects (scalar_to_vector (i8 (trunc GR32:$mask))),
                                                        (_.EltVT _.FRC:$src1),
                                                        (_.EltVT ZeroFP))))))),
           (!cast<Instruction>(InstrStr#rrkz)
@@ -3993,7 +3993,7 @@ defm : avx512_load_scalar_lowering_subre
 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
 
-def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
+def : Pat<(f32 (X86selects (scalar_to_vector GR8:$mask),
                            (f32 FR32X:$src1), (f32 FR32X:$src2))),
           (COPY_TO_REGCLASS
             (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
@@ -4007,7 +4007,7 @@ def : Pat<(f32 (X86selects VK1WM:$mask,
            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),
            (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>;
 
-def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
+def : Pat<(f64 (X86selects (scalar_to_vector GR8:$mask),
                            (f64 FR64X:$src1), (f64 FR64X:$src2))),
           (COPY_TO_REGCLASS
             (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),

Modified: llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll?rev=325949&r1=325948&r2=325949&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll (original)
+++ llvm/trunk/test/CodeGen/X86/sse-scalar-fp-arith.ll Fri Feb 23 12:13:42 2018
@@ -1117,9 +1117,8 @@ define <4 x float> @add_ss_mask(<4 x flo
 ;
 ; AVX512-LABEL: add_ss_mask:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm1
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vmovss %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT:    vaddss %xmm1, %xmm0, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovaps %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %1 = extractelement <4 x float> %a, i64 0
@@ -1172,9 +1171,8 @@ define <2 x double> @add_sd_mask(<2 x do
 ;
 ; AVX512-LABEL: add_sd_mask:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm1
 ; AVX512-NEXT:    kmovw %edi, %k1
-; AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm2 {%k1}
+; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm2 {%k1}
 ; AVX512-NEXT:    vmovapd %xmm2, %xmm0
 ; AVX512-NEXT:    retq
   %1 = extractelement <2 x double> %a, i64 0