[llvm] r349746 - [SystemZ] Make better use of VLDEB
Ulrich Weigand via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 20 04:59:05 PST 2018
Author: uweigand
Date: Thu Dec 20 04:59:05 2018
New Revision: 349746
URL: http://llvm.org/viewvc/llvm-project?rev=349746&view=rev
Log:
[SystemZ] Make better use of VLDEB
We already have special code (DAG combine support for FP_ROUND)
to recognize cases where we an use a vector version of VLEDB to
perform two floating-point truncates in parallel, but equivalent
support for VLEDB (vector floating-point extends) has been
missing so far. This patch adds corresponding DAG combine
support for FP_EXTEND.
Modified:
llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
llvm/trunk/test/CodeGen/SystemZ/vec-conv-02.ll
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp?rev=349746&r1=349745&r2=349746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.cpp Thu Dec 20 04:59:05 2018
@@ -527,6 +527,7 @@ SystemZTargetLowering::SystemZTargetLowe
setTargetDAGCombine(ISD::STORE);
setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
setTargetDAGCombine(ISD::FP_ROUND);
+ setTargetDAGCombine(ISD::FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::SDIV);
setTargetDAGCombine(ISD::UDIV);
@@ -5485,7 +5486,7 @@ SDValue SystemZTargetLowering::combineFP
// (fpround (extract_vector_elt X 0))
// (fpround (extract_vector_elt X 1)) ->
// (extract_vector_elt (VROUND X) 0)
- // (extract_vector_elt (VROUND X) 1)
+ // (extract_vector_elt (VROUND X) 2)
//
// This is a special case since the target doesn't really support v2f32s.
SelectionDAG &DAG = DCI.DAG;
@@ -5527,6 +5528,53 @@ SDValue SystemZTargetLowering::combineFP
return SDValue();
}
+SDValue SystemZTargetLowering::combineFP_EXTEND(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ // (fpextend (extract_vector_elt X 0))
+ // (fpextend (extract_vector_elt X 2)) ->
+ // (extract_vector_elt (VEXTEND X) 0)
+ // (extract_vector_elt (VEXTEND X) 1)
+ //
+ // This is a special case since the target doesn't really support v2f32s.
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op0 = N->getOperand(0);
+ if (N->getValueType(0) == MVT::f64 &&
+ Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ Op0.getOperand(0).getValueType() == MVT::v4f32 &&
+ Op0.getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
+ SDValue Vec = Op0.getOperand(0);
+ for (auto *U : Vec->uses()) {
+ if (U != Op0.getNode() &&
+ U->hasOneUse() &&
+ U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ U->getOperand(0) == Vec &&
+ U->getOperand(1).getOpcode() == ISD::Constant &&
+ cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 2) {
+ SDValue OtherExtend = SDValue(*U->use_begin(), 0);
+ if (OtherExtend.getOpcode() == ISD::FP_EXTEND &&
+ OtherExtend.getOperand(0) == SDValue(U, 0) &&
+ OtherExtend.getValueType() == MVT::f64) {
+ SDValue VExtend = DAG.getNode(SystemZISD::VEXTEND, SDLoc(N),
+ MVT::v2f64, Vec);
+ DCI.AddToWorklist(VExtend.getNode());
+ SDValue Extract1 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f64,
+ VExtend, DAG.getConstant(1, SDLoc(U), MVT::i32));
+ DCI.AddToWorklist(Extract1.getNode());
+ DAG.ReplaceAllUsesOfValueWith(OtherExtend, Extract1);
+ SDValue Extract0 =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f64,
+ VExtend, DAG.getConstant(0, SDLoc(Op0), MVT::i32));
+ return Extract0;
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -5745,6 +5793,7 @@ SDValue SystemZTargetLowering::PerformDA
case ISD::EXTRACT_VECTOR_ELT: return combineEXTRACT_VECTOR_ELT(N, DCI);
case SystemZISD::JOIN_DWORDS: return combineJOIN_DWORDS(N, DCI);
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
+ case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
Modified: llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h?rev=349746&r1=349745&r2=349746&view=diff
==============================================================================
--- llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h (original)
+++ llvm/trunk/lib/Target/SystemZ/SystemZISelLowering.h Thu Dec 20 04:59:05 2018
@@ -592,6 +592,7 @@ private:
SDValue combineEXTRACT_VECTOR_ELT(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
Modified: llvm/trunk/test/CodeGen/SystemZ/vec-conv-02.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/SystemZ/vec-conv-02.ll?rev=349746&r1=349745&r2=349746&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/SystemZ/vec-conv-02.ll (original)
+++ llvm/trunk/test/CodeGen/SystemZ/vec-conv-02.ll Thu Dec 20 04:59:05 2018
@@ -15,19 +15,30 @@ define void @f1(<2 x double> %val, <2 x
; Test conversion of an f64 in a vector register to an f32.
define float @f2(<2 x double> %vec) {
; CHECK-LABEL: f2:
-; CHECK: wledb %f0, %v24
+; CHECK: wledb %f0, %v24, 0, 0
; CHECK: br %r14
%scalar = extractelement <2 x double> %vec, i32 0
%ret = fptrunc double %scalar to float
ret float %ret
}
-; Test conversion of an f32 in a vector register to an f64.
-define double @f3(<4 x float> %vec) {
+; Test cases where even elements of a v4f32 are converted to f64s.
+define <2 x double> @f3(<4 x float> %vec) {
; CHECK-LABEL: f3:
+; CHECK: vldeb %v24, {{%v[0-9]+}}
+; CHECK: br %r14
+ %shuffle = shufflevector <4 x float> %vec, <4 x float> undef, <2 x i32> <i32 0, i32 2>
+ %res = fpext <2 x float> %shuffle to <2 x double>
+ ret <2 x double> %res
+}
+
+; Test conversion of an f32 in a vector register to an f64.
+define double @f4(<4 x float> %vec) {
+; CHECK-LABEL: f4:
; CHECK: wldeb %f0, %v24
; CHECK: br %r14
%scalar = extractelement <4 x float> %vec, i32 0
%ret = fpext float %scalar to double
ret double %ret
}
+
More information about the llvm-commits
mailing list