[llvm] r346746 - [SystemZ] Increase the number of VLREPs

Jonas Paulsson via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 13 00:37:09 PST 2018


Author: jonpa
Date: Tue Nov 13 00:37:09 2018
New Revision: 346746

URL: http://llvm.org/viewvc/llvm-project?rev=346746&view=rev
Log:
[SystemZ]  Increase the number of VLREPs

If a loaded value is replicated it is best to combine these two operations
into a VLREP (load and replicate), but isel will not produce this if the load
has other users as well.

This patch handles this by putting the other users of the load to use the
REPLICATE 0-element instead of the load. This way the load has only the
REPLICATE node as user, and we get a VLREP.

Review: Ulrich Weigand
https://reviews.llvm.org/D54264

Added:
    llvm/trunk/test/CodeGen/SystemZ/vec-move-21.ll
    llvm/trunk/test/CodeGen/SystemZ/vec-move-22.ll
Modified:
    llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
    llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=346746&r1=346745&r2=346746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Tue Nov 13 00:37:09 2018
@@ -523,6 +523,7 @@ SystemZTargetLowering::SystemZTargetLowe
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
+  setTargetDAGCombine(ISD::LOAD);
   setTargetDAGCombine(ISD::STORE);
   setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
   setTargetDAGCombine(ISD::FP_ROUND);
@@ -5368,6 +5369,46 @@ SDValue SystemZTargetLowering::combineME
   return SDValue();
 }
 
+SDValue SystemZTargetLowering::combineLOAD(
+    SDNode *N, DAGCombinerInfo &DCI) const {
+  SelectionDAG &DAG = DCI.DAG;
+  EVT LdVT = N->getValueType(0);
+  if (LdVT.isVector() || LdVT.isInteger())
+    return SDValue();
+  // Transform a scalar load that is REPLICATEd as well as having other
+  // use(s) to the form where the other use(s) use the first element of the
+  // REPLICATE instead of the load. Otherwise instruction selection will not
+  // produce a VLREP. Avoid extracting to a GPR, so only do this for floating
+  // point loads.
+
+  SDValue Replicate;
+  SmallVector<SDNode*, 8> OtherUses;
+  for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
+       UI != UE; ++UI) {
+    if (UI->getOpcode() == SystemZISD::REPLICATE) {
+      if (Replicate)
+        return SDValue(); // Should never happen
+      Replicate = SDValue(*UI, 0);
+    }
+    else if (UI.getUse().getResNo() == 0)
+      OtherUses.push_back(*UI);
+  }
+  if (!Replicate || OtherUses.empty())
+    return SDValue();
+
+  SDLoc DL(N);
+  SDValue Extract0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, LdVT,
+                              Replicate, DAG.getConstant(0, DL, MVT::i32));
+  // Update uses of the loaded Value while preserving old chains.
+  for (SDNode *U : OtherUses) {
+    SmallVector<SDValue, 8> Ops;
+    for (SDValue Op : U->ops())
+      Ops.push_back((Op.getNode() == N && Op.getResNo() == 0) ? Extract0 : Op);
+    DAG.UpdateNodeOperands(U, Ops);
+  }
+  return SDValue(N, 0);
+}
+
 SDValue SystemZTargetLowering::combineSTORE(
     SDNode *N, DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -5699,6 +5740,7 @@ SDValue SystemZTargetLowering::PerformDA
   case ISD::SIGN_EXTEND_INREG:  return combineSIGN_EXTEND_INREG(N, DCI);
   case SystemZISD::MERGE_HIGH:
   case SystemZISD::MERGE_LOW:   return combineMERGE(N, DCI);
+  case ISD::LOAD:               return combineLOAD(N, DCI);
   case ISD::STORE:              return combineSTORE(N, DCI);
   case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
   case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);

Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h?rev=346746&r1=346745&r2=346746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h Tue Nov 13 00:37:09 2018
@@ -587,6 +587,7 @@ private:
   SDValue combineSIGN_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSIGN_EXTEND_INREG(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineMERGE(SDNode *N, DAGCombinerInfo &DCI) const;
+  SDValue combineLOAD(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineSTORE(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
   SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;

Added: llvm/trunk/test/CodeGen/SystemZ/vec-move-21.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-move-21.ll?rev=346746&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-move-21.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-move-21.ll Tue Nov 13 00:37:09 2018
@@ -0,0 +1,56 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test that a replicate of a load gets folded to vlrep also in cases where
+; the load has multiple users.
+
+; CHECK-NOT: vrep
+
+
+define double @fun(double* %Vsrc, <2 x double> %T) {
+entry:
+  %Vgep1 = getelementptr double, double* %Vsrc, i64 0
+  %Vld1 = load double, double* %Vgep1
+  %Vgep2 = getelementptr double, double* %Vsrc, i64 1
+  %Vld2 = load double, double* %Vgep2
+  %Vgep3 = getelementptr double, double* %Vsrc, i64 2
+  %Vld3 = load double, double* %Vgep3
+  %Vgep4 = getelementptr double, double* %Vsrc, i64 3
+  %Vld4 = load double, double* %Vgep4
+  %Vgep5 = getelementptr double, double* %Vsrc, i64 4
+  %Vld5 = load double, double* %Vgep5
+  %Vgep6 = getelementptr double, double* %Vsrc, i64 5
+  %Vld6 = load double, double* %Vgep6
+
+  %V19 = insertelement <2 x double> undef, double %Vld1, i32 0
+  %V20 = shufflevector <2 x double> %V19, <2 x double> undef, <2 x i32> zeroinitializer
+  %V21 = insertelement <2 x double> undef, double %Vld4, i32 0
+  %V22 = insertelement <2 x double> %V21, double %Vld5, i32 1
+  %V23 = fmul <2 x double> %V20, %V22
+  %V24 = fadd <2 x double> %T, %V23
+  %V25 = insertelement <2 x double> %V19, double %Vld2, i32 1
+  %V26 = insertelement <2 x double> undef, double %Vld6, i32 0
+  %V27 = insertelement <2 x double> %V26, double %Vld6, i32 1
+  %V28 = fmul <2 x double> %V25, %V27
+  %V29 = fadd <2 x double> %T, %V28
+  %V30 = insertelement <2 x double> undef, double %Vld2, i32 0
+  %V31 = shufflevector <2 x double> %V30, <2 x double> undef, <2 x i32> zeroinitializer
+  %V32 = insertelement <2 x double> undef, double %Vld5, i32 0
+  %V33 = insertelement <2 x double> %V32, double %Vld6, i32 1
+  %V34 = fmul <2 x double> %V31, %V33
+  %V35 = fadd <2 x double> %T, %V34
+  %V36 = insertelement <2 x double> undef, double %Vld3, i32 0
+  %V37 = shufflevector <2 x double> %V36, <2 x double> undef, <2 x i32> zeroinitializer
+  %V38 = fmul <2 x double> %V37, %V22
+  %V39 = fadd <2 x double> %T, %V38
+  %Vmul37 = fmul double %Vld3, %Vld6
+  %Vadd38 = fadd double %Vmul37, %Vmul37
+
+  %VA0 = fadd <2 x double> %V24, %V29
+  %VA1 = fadd <2 x double> %VA0, %V35
+  %VA2 = fadd <2 x double> %VA1, %V39
+
+  %VE0 = extractelement <2 x double> %VA2, i32 0
+  %VS1 = fadd double %VE0, %Vadd38
+
+  ret double %VS1
+}

Added: llvm/trunk/test/CodeGen/SystemZ/vec-move-22.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-move-22.ll?rev=346746&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-move-22.ll (added)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-move-22.ll Tue Nov 13 00:37:09 2018
@@ -0,0 +1,15 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+; Test that a loaded value which is used both in a vector and scalar context
+; is not transformed to a vlrep + vlgvg.
+
+; CHECK-NOT: vlrep
+
+define void @fun(i64 %arg, i64** %Addr, <2 x i64*>* %Dst) {
+  %tmp10 = load i64*, i64** %Addr
+  store i64 %arg, i64* %tmp10
+  %tmp12 = insertelement <2 x i64*> undef, i64* %tmp10, i32 0
+  %tmp13 = insertelement <2 x i64*> %tmp12, i64* %tmp10, i32 1
+  store <2 x i64*> %tmp13, <2 x i64*>* %Dst
+  ret void
+}




More information about the llvm-commits mailing list