[llvm] 132f25b - [SystemZ] Avoid scalarization of [SU]INT_TO_FP ISD-nodes.
Jonas Paulsson via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 16 05:10:00 PDT 2020
Author: Jonas Paulsson
Date: 2020-03-16T13:07:42+01:00
New Revision: 132f25bcca2e4cdf89df7eb1eff8051dec381e0c
URL: https://github.com/llvm/llvm-project/commit/132f25bcca2e4cdf89df7eb1eff8051dec381e0c
DIFF: https://github.com/llvm/llvm-project/commit/132f25bcca2e4cdf89df7eb1eff8051dec381e0c.diff
LOG: [SystemZ] Avoid scalarization of [SU]INT_TO_FP ISD-nodes.
The type legalizer will scalarize vector conversions from integer to floating
point if the source element size is less than that of the result.
This is avoided now by inserting a zero/sign-extension of the source vector
before type legalization.
Review: Ulrich Weigand
Differential revision: https://reviews.llvm.org/D75978
Added:
llvm/test/CodeGen/SystemZ/vec-move-23.ll
Modified:
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/lib/Target/SystemZ/SystemZISelLowering.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index ddced5a42103..66da118dd825 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -641,6 +641,8 @@ SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::FP_ROUND);
setTargetDAGCombine(ISD::STRICT_FP_ROUND);
setTargetDAGCombine(ISD::FP_EXTEND);
+ setTargetDAGCombine(ISD::SINT_TO_FP);
+ setTargetDAGCombine(ISD::UINT_TO_FP);
setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
setTargetDAGCombine(ISD::BSWAP);
setTargetDAGCombine(ISD::SDIV);
@@ -6081,6 +6083,32 @@ SDValue SystemZTargetLowering::combineFP_EXTEND(
return SDValue();
}
+SDValue SystemZTargetLowering::combineINT_TO_FP(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ if (DCI.Level != BeforeLegalizeTypes)
+ return SDValue();
+ unsigned Opcode = N->getOpcode();
+ EVT OutVT = N->getValueType(0);
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue Op = N->getOperand(0);
+ unsigned OutScalarBits = OutVT.getScalarSizeInBits();
+ unsigned InScalarBits = Op->getValueType(0).getScalarSizeInBits();
+
+ // Insert an extension before type-legalization to avoid scalarization, e.g.:
+ // v2f64 = uint_to_fp v2i16
+ // =>
+ // v2f64 = uint_to_fp (v2i64 zero_extend v2i16)
+ if (OutVT.isVector() && OutScalarBits > InScalarBits) {
+ MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(OutVT.getScalarSizeInBits()),
+ OutVT.getVectorNumElements());
+ unsigned ExtOpcode =
+ (Opcode == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND);
+ SDValue ExtOp = DAG.getNode(ExtOpcode, SDLoc(N), ExtVT, Op);
+ return DAG.getNode(Opcode, SDLoc(N), OutVT, ExtOp);
+ }
+ return SDValue();
+}
+
SDValue SystemZTargetLowering::combineBSWAP(
SDNode *N, DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -6408,6 +6436,8 @@ SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::FP_ROUND: return combineFP_ROUND(N, DCI);
case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND: return combineFP_EXTEND(N, DCI);
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP: return combineINT_TO_FP(N, DCI);
case ISD::BSWAP: return combineBSWAP(N, DCI);
case SystemZISD::BR_CCMASK: return combineBR_CCMASK(N, DCI);
case SystemZISD::SELECT_CCMASK: return combineSELECT_CCMASK(N, DCI);
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.h b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
index 739377ed0f95..f482b7baa19f 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.h
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.h
@@ -642,6 +642,7 @@ class SystemZTargetLowering : public TargetLowering {
SDValue combineJOIN_DWORDS(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_ROUND(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineFP_EXTEND(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineINT_TO_FP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBSWAP(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineBR_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSELECT_CCMASK(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/test/CodeGen/SystemZ/vec-move-23.ll b/llvm/test/CodeGen/SystemZ/vec-move-23.ll
new file mode 100644
index 000000000000..430e879bcc06
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-move-23.ll
@@ -0,0 +1,131 @@
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s -check-prefixes=CHECK,Z14
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z15 | FileCheck %s -check-prefixes=CHECK,Z15
+;
+; Check that int-to-fp conversions from a narrower type get a vector extension.
+
+define void @fun0(<2 x i8> %Src, <2 x double>* %Dst) {
+; CHECK-LABEL: fun0:
+; CHECK: vuphb %v0, %v24
+; CHECK-NEXT: vuphh %v0, %v0
+; CHECK-NEXT: vuphf %v0, %v0
+; CHECK-NEXT: vcdgb %v0, %v0, 0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %c = sitofp <2 x i8> %Src to <2 x double>
+ store <2 x double> %c, <2 x double>* %Dst
+ ret void
+}
+
+define void @fun1(<2 x i16> %Src, <2 x double>* %Dst) {
+; CHECK-LABEL: fun1:
+; CHECK: vuphh %v0, %v24
+; CHECK-NEXT: vuphf %v0, %v0
+; CHECK-NEXT: vcdgb %v0, %v0, 0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %c = sitofp <2 x i16> %Src to <2 x double>
+ store <2 x double> %c, <2 x double>* %Dst
+ ret void
+}
+
+define void @fun2(<2 x i32> %Src, <2 x double>* %Dst) {
+; CHECK-LABEL: fun2:
+; CHECK: vuphf %v0, %v24
+; CHECK-NEXT: vcdgb %v0, %v0, 0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %c = sitofp <2 x i32> %Src to <2 x double>
+ store <2 x double> %c, <2 x double>* %Dst
+ ret void
+}
+
+define void @fun3(<4 x i16> %Src, <4 x float>* %Dst) {
+; CHECK-LABEL: fun3:
+
+; Z14: vuphh %v0, %v24
+; Z14-NEXT: vlgvf %r0, %v0, 3
+; Z14-NEXT: cefbr %f1, %r0
+; Z14-NEXT: vlgvf %r0, %v0, 2
+; Z14-NEXT: cefbr %f2, %r0
+; Z14-NEXT: vlgvf %r0, %v0, 1
+; Z14-NEXT: vmrhf %v1, %v2, %v1
+; Z14-NEXT: cefbr %f2, %r0
+; Z14-NEXT: vlgvf %r0, %v0, 0
+; Z14-NEXT: cefbr %f0, %r0
+; Z14-NEXT: vmrhf %v0, %v0, %v2
+; Z14-NEXT: vmrhg %v0, %v0, %v1
+; Z14-NEXT: vst %v0, 0(%r2), 3
+; Z14-NEXT: br %r14
+
+; Z15: vuphh %v0, %v24
+; Z15-NEXT: vcefb %v0, %v0, 0, 0
+; Z15-NEXT: vst %v0, 0(%r2), 3
+; Z15-NEXT: br %r14
+ %c = sitofp <4 x i16> %Src to <4 x float>
+ store <4 x float> %c, <4 x float>* %Dst
+ ret void
+}
+
+define void @fun4(<2 x i8> %Src, <2 x double>* %Dst) {
+; CHECK-LABEL: fun4:
+; CHECK: vuplhb %v0, %v24
+; CHECK-NEXT: vuplhh %v0, %v0
+; CHECK-NEXT: vuplhf %v0, %v0
+; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %c = uitofp <2 x i8> %Src to <2 x double>
+ store <2 x double> %c, <2 x double>* %Dst
+ ret void
+}
+
+define void @fun5(<2 x i16> %Src, <2 x double>* %Dst) {
+; CHECK-LABEL: fun5:
+; CHECK: vuplhh %v0, %v24
+; CHECK-NEXT: vuplhf %v0, %v0
+; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %c = uitofp <2 x i16> %Src to <2 x double>
+ store <2 x double> %c, <2 x double>* %Dst
+ ret void
+}
+
+define void @fun6(<2 x i32> %Src, <2 x double>* %Dst) {
+; CHECK-LABEL: fun6:
+; CHECK: vuplhf %v0, %v24
+; CHECK-NEXT: vcdlgb %v0, %v0, 0, 0
+; CHECK-NEXT: vst %v0, 0(%r2), 3
+; CHECK-NEXT: br %r14
+ %c = uitofp <2 x i32> %Src to <2 x double>
+ store <2 x double> %c, <2 x double>* %Dst
+ ret void
+}
+
+define void @fun7(<4 x i16> %Src, <4 x float>* %Dst) {
+; CHECK-LABEL: fun7:
+
+; Z14: vuplhh %v0, %v24
+; Z14-NEXT: vlgvf %r0, %v0, 3
+; Z14-NEXT: celfbr %f1, 0, %r0, 0
+; Z14-NEXT: vlgvf %r0, %v0, 2
+; Z14-NEXT: celfbr %f2, 0, %r0, 0
+; Z14-NEXT: vlgvf %r0, %v0, 1
+; Z14-NEXT: vmrhf %v1, %v2, %v1
+; Z14-NEXT: celfbr %f2, 0, %r0, 0
+; Z14-NEXT: vlgvf %r0, %v0, 0
+; Z14-NEXT: celfbr %f0, 0, %r0, 0
+; Z14-NEXT: vmrhf %v0, %v0, %v2
+; Z14-NEXT: vmrhg %v0, %v0, %v1
+; Z14-NEXT: vst %v0, 0(%r2), 3
+; Z14-NEXT: br %r14
+
+; Z15: vuplhh %v0, %v24
+; Z15-NEXT: vcelfb %v0, %v0, 0, 0
+; Z15-NEXT: vst %v0, 0(%r2), 3
+; Z15-NEXT: br %r14
+ %c = uitofp <4 x i16> %Src to <4 x float>
+ store <4 x float> %c, <4 x float>* %Dst
+ ret void
+}
+
More information about the llvm-commits
mailing list