[llvm] r353325 - [SystemZ] Wait with VGBM selection until after DAGCombine2.

Wed Feb 6 10:59:19 PST 2019

Author: jonpa
Date: Wed Feb  6 10:59:19 2019
New Revision: 353325

URL: http://llvm.org/viewvc/llvm-project?rev=353325&view=rev
Log:
[SystemZ]  Wait with VGBM selection until after DAGCombine2.

Don't lower BUILD_VECTORs to BYTE_MASK, but instead expose the BUILD_VECTORs
to the DAGCombiner and select them to VGBM in Select(). This allows the
DAGCombiner to understand the constant vector values.

For floating point, only all-zeros vectors are now generated with VGBM, as it
turned out to be somewhat complicated to handle any arbitrary constants,
while in practice this is very rare and hardly needed.

The SystemZ ISD opcodes z_byte_mask, z_vzero and z_vones have been removed.

Review: Ulrich Weigand
https://reviews.llvm.org/D57152

Added:
    llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll
Modified:
    llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
    llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td
    llvm/trunk/lib/Target/SystemZ/SystemZOperators.td
    llvm/trunk/test/CodeGen/SystemZ/vec-const-05.ll
    llvm/trunk/test/CodeGen/SystemZ/vec-const-06.ll

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp?rev=353325&r1=353324&r2=353325&view=diff
==============================================================================

--- llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp Wed Feb  6 10:59:19 2019
@@ -11,6 +11,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SystemZTargetMachine.h"
+#include "SystemZISelLowering.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/CodeGen/SelectionDAGISel.h"
 #include "llvm/Support/Debug.h"
@@ -1525,6 +1526,20 @@ void SystemZDAGToDAGISel::Select(SDNode
     }
     break;
   }
+
+  case ISD::BUILD_VECTOR: {
+    auto *BVN = cast<BuildVectorSDNode>(Node);
+    SDLoc DL(Node);
+    EVT VT = Node->getValueType(0);
+    uint64_t Mask = 0;
+    if (SystemZTargetLowering::tryBuildVectorByteMask(BVN, Mask)) {
+      SDNode *Res = CurDAG->getMachineNode(SystemZ::VGBM, DL, VT,
+                                CurDAG->getTargetConstant(Mask, DL, MVT::i32));
+      ReplaceNode(Node, Res);
+      return;
+    }
+    break;
+  }
 
   case ISD::STORE: {
     if (tryFoldLoadStoreIntoMemOperand(Node))

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=353325&r1=353324&r2=353325&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Wed Feb  6 10:59:19 2019
@@ -2510,9 +2510,8 @@ SDValue SystemZTargetLowering::lowerVect
     break;
   }
   if (Invert) {
-    SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-                               DAG.getConstant(65535, DL, MVT::i32));
-    Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
+    SDValue Mask =
+      DAG.getSplatBuildVector(VT, DL, DAG.getConstant(-1, DL, MVT::i64));
     Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
   }
   return Cmp;
@@ -3330,14 +3329,14 @@ SDValue SystemZTargetLowering::lowerCTPO
       break;
     }
     case 32: {
-      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-                                DAG.getConstant(0, DL, MVT::i32));
+      SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
+                                            DAG.getConstant(0, DL, MVT::i32));
       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
       break;
     }
     case 64: {
-      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-                                DAG.getConstant(0, DL, MVT::i32));
+      SDValue Tmp = DAG.getSplatBuildVector(MVT::v16i8, DL,
+                                            DAG.getConstant(0, DL, MVT::i32));
       Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
       Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
       break;
@@ -4259,10 +4258,10 @@ static SDValue joinDwords(SelectionDAG &
   return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
 }
 
-// Try to represent constant BUILD_VECTOR node BVN using a
-// SystemZISD::BYTE_MASK-style mask.  Store the mask value in Mask
-// on success.
-static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
+// Try to represent constant BUILD_VECTOR node BVN using a BYTE MASK style
+// mask.  Store the mask value in Mask on success.
+bool SystemZTargetLowering::
+tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
   EVT ElemVT = BVN->getValueType(0).getVectorElementType();
   unsigned BytesPerElement = ElemVT.getStoreSize();
   for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
@@ -4541,13 +4540,11 @@ SDValue SystemZTargetLowering::lowerBUIL
     // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
     // preferred way of creating all-zero and all-one vectors so give it
     // priority over other methods below.
-    uint64_t Mask = 0;
-    if (tryBuildVectorByteMask(BVN, Mask)) {
-      SDValue Op = DAG.getNode(
-          SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-          DAG.getConstant(Mask, DL, MVT::i32, false, true /*isOpaque*/));
-      return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-    }
+    uint64_t Mask;
+    if (ISD::isBuildVectorAllZeros(Op.getNode()) ||
+        ISD::isBuildVectorAllOnes(Op.getNode()) ||
+        (VT.isInteger() && tryBuildVectorByteMask(BVN, Mask)))
+      return Op;
 
     // Try using some form of replication.
     APInt SplatBits, SplatUndef;
@@ -5027,7 +5024,6 @@ const char *SystemZTargetLowering::getTa
     OPCODE(TBEGIN);
     OPCODE(TBEGIN_NOFLOAT);
     OPCODE(TEND);
-    OPCODE(BYTE_MASK);
     OPCODE(ROTATE_MASK);
     OPCODE(REPLICATE);
     OPCODE(JOIN_DWORDS);
@@ -5339,8 +5335,7 @@ SDValue SystemZTargetLowering::combineME
   SDValue Op1 = N->getOperand(1);
   if (Op0.getOpcode() == ISD::BITCAST)
     Op0 = Op0.getOperand(0);
-  if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
-      cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
+  if (ISD::isBuildVectorAllZeros(Op0.getNode())) {
     // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
     // for v4f32.
     if (Op1 == N->getOperand(0))

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h?rev=353325&r1=353324&r2=353325&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h Wed Feb  6 10:59:19 2019
@@ -161,10 +161,6 @@ enum NodeType : unsigned {
   // Transaction end.  Just the chain operand.  Returns CC value and chain.
   TEND,
 
-  // Create a vector constant by filling byte N of the result with bit
-  // 15-N of the single operand.
-  BYTE_MASK,
-
   // Create a vector constant by replicating an element-sized RISBG-style mask.
   // The first operand specifies the starting set bit and the second operand
   // specifies the ending set bit.  Both operands count from the MSB of the
@@ -515,6 +511,8 @@ public:
     return true;
   }
 
+  static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask);
+
 private:
   const SystemZSubtarget &Subtarget;
 

Modified: llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td?rev=353325&r1=353324&r2=353325&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZInstrVector.td Wed Feb  6 10:59:19 2019
@@ -60,7 +60,7 @@ let Predicates = [FeatureVector] in {
     // Generate byte mask.
     def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
     def VONE  : InherentVRIa<"vone", 0xE744, 0xffff>;
-    def VGBM  : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
+    def VGBM  : UnaryVRIa<"vgbm", 0xE744, null_frag, v128b, imm32zx16>;
 
     // Generate mask.
     def VGM  : BinaryVRIbGeneric<"vgm", 0xE746>;

Modified: llvm/trunk/lib/Target/SystemZ/SystemZOperators.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZOperators.td?rev=353325&r1=353324&r2=353325&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZOperators.td (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZOperators.td Wed Feb  6 10:59:19 2019
@@ -286,7 +286,6 @@ def z_vector_insert     : SDNode<"ISD::I
                                  SDT_ZInsertVectorElt>;
 def z_vector_extract    : SDNode<"ISD::EXTRACT_VECTOR_ELT",
                                  SDT_ZExtractVectorElt>;
-def z_byte_mask         : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;
 def z_rotate_mask       : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;
 def z_replicate         : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;
 def z_join_dwords       : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>;
@@ -708,10 +707,6 @@ class shiftop<SDPatternOperator operator
              [(operator node:$val, node:$count),
               (operator node:$val, (and node:$count, imm32bottom6set))]>;
 
-// Vector representation of all-zeros and all-ones.
-def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
-def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
-
 // Load a scalar and replicate it in all elements of a vector.
 class z_replicate_load<ValueType scalartype, SDPatternOperator load>
   : PatFrag<(ops node:$addr),
@@ -739,13 +734,13 @@ def z_vlef64 : z_vle<f64, load>;
 // zeroed vector.
 class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
   : PatFrag<(ops node:$addr),
-            (z_vector_insert (z_vzero),
+            (z_vector_insert (immAllZerosV),
                              (scalartype (load node:$addr)), (i32 index))>;
 def z_vllezi8  : z_vllez<i32, anyextloadi8, 7>;
 def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
 def z_vllezi32 : z_vllez<i32, load, 1>;
 def z_vllezi64 : PatFrags<(ops node:$addr),
-                          [(z_vector_insert (z_vzero),
+                          [(z_vector_insert (immAllZerosV),
                                             (i64 (load node:$addr)), (i32 0)),
                            (z_join_dwords (i64 (load node:$addr)), (i64 0))]>;
 // We use high merges to form a v4f32 from four f32s.  Propagating zero
@@ -758,11 +753,12 @@ def z_vllezf32 : PatFrag<(ops node:$addr
                              (bitconvert
                               (v4f32 (scalar_to_vector
                                       (f32 (load node:$addr)))))))),
-                          (v2i64 (z_vzero)))>;
+                          (v2i64
+                           (bitconvert (v4f32 (immAllZerosV)))))>;
 def z_vllezf64 : PatFrag<(ops node:$addr),
                          (z_merge_high
                           (v2f64 (scalar_to_vector (f64 (load node:$addr)))),
-                          (z_vzero))>;
+                          (immAllZerosV))>;
 
 // Similarly for the high element of a zeroed vector.
 def z_vllezli32 : z_vllez<i32, load, 0>;
@@ -773,8 +769,9 @@ def z_vllezlf32 : PatFrag<(ops node:$add
                              (z_merge_high
                               (v4f32 (scalar_to_vector
                                       (f32 (load node:$addr)))),
-                              (v4f32 (z_vzero))))),
-                           (v2i64 (z_vzero)))>;
+                              (v4f32 (immAllZerosV))))),
+                           (v2i64
+                            (bitconvert (v4f32 (immAllZerosV)))))>;
 
 // Store one element of a vector.
 class z_vste<ValueType scalartype, SDPatternOperator store>
@@ -789,16 +786,16 @@ def z_vstef32 : z_vste<f32, store>;
 def z_vstef64 : z_vste<f64, store>;
 
 // Arithmetic negation on vectors.
-def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
+def z_vneg : PatFrag<(ops node:$x), (sub (immAllZerosV), node:$x)>;
 
 // Bitwise negation on vectors.
-def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
+def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (immAllOnesV))>;
 
 // Signed "integer greater than zero" on vectors.
-def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
+def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (immAllZerosV))>;
 
 // Signed "integer less than zero" on vectors.
-def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
+def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (immAllZerosV), node:$x)>;
 
 // Integer absolute on vectors.
 class z_viabs<int shift>

Added: llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll?rev=353325&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/buildvector-00.ll Wed Feb  6 10:59:19 2019
@@ -0,0 +1,36 @@
+; Test that the dag combiner can understand that some vector operands are
+; all-zeros and then optimize the logical operations.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+define void @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vno
+; CHECK-NOT: vno
+
+bb:
+  %tmp = shufflevector <2 x i64> undef, <2 x i64> undef, <2 x i32> zeroinitializer
+  br label %bb1
+
+bb1:                                              ; preds = %bb
+  %tmp2 = load i64, i64* undef, align 8
+  %tmp3 = insertelement <2 x i64> undef, i64 %tmp2, i32 1
+  %tmp4 = icmp ne <2 x i64> undef, zeroinitializer
+  %tmp5 = xor <2 x i1> %tmp4, zeroinitializer
+  %tmp6 = xor <2 x i1> zeroinitializer, %tmp5
+  %tmp7 = and <2 x i64> %tmp3, %tmp
+  %tmp8 = icmp ne <2 x i64> %tmp7, zeroinitializer
+  %tmp9 = xor <2 x i1> zeroinitializer, %tmp8
+  %tmp10 = icmp ne <2 x i64> undef, zeroinitializer
+  %tmp11 = xor <2 x i1> %tmp10, %tmp9
+  %tmp12 = and <2 x i1> %tmp6, %tmp11
+  %tmp13 = extractelement <2 x i1> %tmp12, i32 0
+  br i1 %tmp13, label %bb14, label %bb15
+
+bb14:                                             ; preds = %bb1
+  store i64 undef, i64* undef, align 8
+  br label %bb15
+
+bb15:                                             ; preds = %bb14, %bb1
+  unreachable
+}

Modified: llvm/trunk/test/CodeGen/SystemZ/vec-const-05.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-const-05.ll?rev=353325&r1=353324&r2=353325&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-const-05.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-const-05.ll Wed Feb  6 10:59:19 2019
@@ -1,63 +1,28 @@
-; Test vector byte masks, v4f32 version.
+; Test vector byte masks, v4f32 version. Only all-zero vectors are handled.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 
 ; Test an all-zeros vector.
-define <4 x float> @f1() {
-; CHECK-LABEL: f1:
+define <4 x float> @f0() {
+; CHECK-LABEL: f0:
 ; CHECK: vgbm %v24, 0
 ; CHECK: br %r14
   ret <4 x float> zeroinitializer
 }
 
-; Test an all-ones vector.
-define <4 x float> @f2() {
-; CHECK-LABEL: f2:
-; CHECK: vgbm %v24, 65535
-; CHECK: br %r14
-  ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000,
-                   float 0xffffffffe0000000, float 0xffffffffe0000000>
-}
-
-; Test a mixed vector (mask 0xc731).
-define <4 x float> @f3() {
-; CHECK-LABEL: f3:
-; CHECK: vgbm %v24, 50993
-; CHECK: br %r14
-  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffe0000000,
-                   float 0x379fffe000000000, float 0x371fe00000000000>
-}
-
-; Test that undefs are treated as zero (mask 0xc031).
-define <4 x float> @f4() {
-; CHECK-LABEL: f4:
-; CHECK: vgbm %v24, 49201
-; CHECK: br %r14
-  ret <4 x float> <float 0xffffe00000000000, float undef,
-                   float 0x379fffe000000000, float 0x371fe00000000000>
-}
-
-; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-define <4 x float> @f5() {
-; CHECK-LABEL: f5:
-; CHECK-NOT: vgbm
+; Test that undefs are treated as zero.
+define <4 x float> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
 ; CHECK: br %r14
-  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffc0000000,
-                   float 0x379fffe000000000, float 0x371fe00000000000>
+  ret <4 x float> <float zeroinitializer, float undef,
+                   float zeroinitializer, float undef>
 }
 
 ; Test an all-zeros v2f32 that gets promoted to v4f32.
-define <2 x float> @f6() {
-; CHECK-LABEL: f6:
+define <2 x float> @f2() {
+; CHECK-LABEL: f2:
 ; CHECK: vgbm %v24, 0
 ; CHECK: br %r14
   ret <2 x float> zeroinitializer
 }
-
-; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700).
-define <2 x float> @f7() {
-; CHECK-LABEL: f7:
-; CHECK: vgbm %v24, 50944
-; CHECK: br %r14
-  ret <2 x float> <float 0xffffe00000000000, float 0x381fffffe0000000>
-}

Modified: llvm/trunk/test/CodeGen/SystemZ/vec-const-06.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-const-06.ll?rev=353325&r1=353324&r2=353325&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-const-06.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-const-06.ll Wed Feb  6 10:59:19 2019
@@ -1,43 +1,19 @@
-; Test vector byte masks, v2f64 version.
+; Test vector byte masks, v2f64 version. Only all-zero vectors are handled.
 ;
 ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 
 ; Test an all-zeros vector.
-define <2 x double> @f1() {
-; CHECK-LABEL: f1:
+define <2 x double> @f0() {
+; CHECK-LABEL: f0:
 ; CHECK: vgbm %v24, 0
 ; CHECK: br %r14
   ret <2 x double> zeroinitializer
 }
 
-; Test an all-ones vector.
-define <2 x double> @f2() {
-; CHECK-LABEL: f2:
-; CHECK: vgbm %v24, 65535
-; CHECK: br %r14
-  ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
-}
-
-; Test a mixed vector (mask 0x8c76).
-define <2 x double> @f3() {
-; CHECK-LABEL: f3:
-; CHECK: vgbm %v24, 35958
-; CHECK: br %r14
-  ret <2 x double> <double 0xff000000ffff0000, double 0x00ffffff00ffff00>
-}
-
-; Test that undefs are treated as zero (mask 0x8c00).
-define <2 x double> @f4() {
-; CHECK-LABEL: f4:
-; CHECK: vgbm %v24, 35840
-; CHECK: br %r14
-  ret <2 x double> <double 0xff000000ffff0000, double undef>
-}
-
-; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-define <2 x double> @f5() {
-; CHECK-LABEL: f5:
-; CHECK-NOT: vgbm
+; Test that undefs are treated as zero.
+define <2 x double> @f1() {
+; CHECK-LABEL: f1:
+; CHECK: vgbm %v24, 0
 ; CHECK: br %r14
-  ret <2 x double> <double 0xfe000000ffff0000, double 0x00ffffff00ffff00>
+  ret <2 x double> <double zeroinitializer, double undef>
 }