[llvm] 4ca1fbe - [SelectionDAG] Make WidenVecRes_Convert work for scalable vectors.
Sander de Smalen via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 22 02:58:58 PDT 2021
Author: Sander de Smalen
Date: 2021-09-22T10:58:38+01:00
New Revision: 4ca1fbe361860976646ad09da26757bf32563145
URL: https://github.com/llvm/llvm-project/commit/4ca1fbe361860976646ad09da26757bf32563145
DIFF: https://github.com/llvm/llvm-project/commit/4ca1fbe361860976646ad09da26757bf32563145.diff
LOG: [SelectionDAG] Make WidenVecRes_Convert work for scalable vectors.
Most of the code wasn't yet scalable safe, although most of the
code conceptually just works for scalable vectors. This change
makes the algorithm work on ElementCount, where appropriate,
and leaves the fixed-width only code to use `getFixedNumElements`.
Reviewed By: david-arm
Differential Revision: https://reviews.llvm.org/D110058
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/AArch64/sve-fcvt.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 6dee032cd081c..5b658eea372ab 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -3623,7 +3623,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDLoc DL(N);
EVT WidenVT = TLI.getTypeToTransformTo(Ctx, N->getValueType(0));
- unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ ElementCount WidenEC = WidenVT.getVectorElementCount();
EVT InVT = InOp.getValueType();
@@ -3643,14 +3643,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
}
EVT InEltVT = InVT.getVectorElementType();
- EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenNumElts);
- unsigned InVTNumElts = InVT.getVectorNumElements();
+ EVT InWidenVT = EVT::getVectorVT(Ctx, InEltVT, WidenEC);
+ ElementCount InVTEC = InVT.getVectorElementCount();
if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
- InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts) {
+ InVTEC = InVT.getVectorElementCount();
+ if (InVTEC == WidenEC) {
if (N->getNumOperands() == 1)
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
@@ -3674,9 +3674,10 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// it an illegal type that might lead to repeatedly splitting the input
// and then widening it. To avoid this, we widen the input only if
// it results in a legal type.
- if (WidenNumElts % InVTNumElts == 0) {
+ if (WidenEC.isKnownMultipleOf(InVTEC.getKnownMinValue())) {
// Widen the input and call convert on the widened input vector.
- unsigned NumConcat = WidenNumElts/InVTNumElts;
+ unsigned NumConcat =
+ WidenEC.getKnownMinValue() / InVTEC.getKnownMinValue();
SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT));
Ops[0] = InOp;
SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT, Ops);
@@ -3685,7 +3686,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1), Flags);
}
- if (InVTNumElts % WidenNumElts == 0) {
+ if (InVTEC.isKnownMultipleOf(WidenEC.getKnownMinValue())) {
SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, InOp,
DAG.getVectorIdxConstant(0, DL));
// Extract the input and convert the shorten input vector.
@@ -3697,7 +3698,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
// Otherwise unroll into some nasty scalar code and rebuild the vector.
EVT EltVT = WidenVT.getVectorElementType();
- SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 16> Ops(WidenEC.getFixedValue(), DAG.getUNDEF(EltVT));
// Use the original element count so we don't do more scalar opts than
// necessary.
unsigned MinElts = N->getValueType(0).getVectorNumElements();
diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll
index bffd79278a80c..8b102679c9ddb 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll
@@ -15,6 +15,16 @@ define <vscale x 2 x float> @fcvts_nxv2f16(<vscale x 2 x half> %a) {
ret <vscale x 2 x float> %res
}
+define <vscale x 3 x float> @fcvts_nxv3f16(<vscale x 3 x half> %a) {
+; CHECK-LABEL: fcvts_nxv3f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.s, p0/m, z0.h
+; CHECK-NEXT: ret
+ %res = fpext <vscale x 3 x half> %a to <vscale x 3 x float>
+ ret <vscale x 3 x float> %res
+}
+
define <vscale x 4 x float> @fcvts_nxv4f16(<vscale x 4 x half> %a) {
; CHECK-LABEL: fcvts_nxv4f16:
; CHECK: // %bb.0:
@@ -59,6 +69,16 @@ define <vscale x 2 x half> @fcvth_nxv2f32(<vscale x 2 x float> %a) {
ret <vscale x 2 x half> %res
}
+define <vscale x 3 x half> @fcvth_nxv3f32(<vscale x 3 x float> %a) {
+; CHECK-LABEL: fcvth_nxv3f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvt z0.h, p0/m, z0.s
+; CHECK-NEXT: ret
+ %res = fptrunc <vscale x 3 x float> %a to <vscale x 3 x half>
+ ret <vscale x 3 x half> %res
+}
+
define <vscale x 4 x half> @fcvth_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvth_nxv4f32:
; CHECK: // %bb.0:
@@ -143,6 +163,16 @@ define <vscale x 4 x i16> @fcvtzs_h_nxv4f32(<vscale x 4 x float> %a) {
ret <vscale x 4 x i16> %res
}
+define <vscale x 7 x i16> @fcvtzs_h_nxv7f16(<vscale x 7 x half> %a) {
+; CHECK-LABEL: fcvtzs_h_nxv7f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 7 x half> %a to <vscale x 7 x i16>
+ ret <vscale x 7 x i16> %res
+}
+
define <vscale x 8 x i16> @fcvtzs_h_nxv8f16(<vscale x 8 x half> %a) {
; CHECK-LABEL: fcvtzs_h_nxv8f16:
; CHECK: // %bb.0:
@@ -193,6 +223,16 @@ define <vscale x 4 x i32> @fcvtzs_s_nxv4f16(<vscale x 4 x half> %a) {
ret <vscale x 4 x i32> %res
}
+define <vscale x 3 x i32> @fcvtzs_s_nxv3f16(<vscale x 3 x half> %a) {
+; CHECK-LABEL: fcvtzs_s_nxv3f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h
+; CHECK-NEXT: ret
+ %res = fptosi <vscale x 3 x half> %a to <vscale x 3 x i32>
+ ret <vscale x 3 x i32> %res
+}
+
define <vscale x 4 x i32> @fcvtzs_s_nxv4f32(<vscale x 4 x float> %a) {
; CHECK-LABEL: fcvtzs_s_nxv4f32:
; CHECK: // %bb.0:
@@ -289,6 +329,16 @@ define <vscale x 4 x i16> @fcvtzu_h_nxv4f32(<vscale x 4 x float> %a) {
ret <vscale x 4 x i16> %res
}
+define <vscale x 7 x i16> @fcvtzu_h_nxv7f16(<vscale x 7 x half> %a) {
+; CHECK-LABEL: fcvtzu_h_nxv7f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 7 x half> %a to <vscale x 7 x i16>
+ ret <vscale x 7 x i16> %res
+}
+
define <vscale x 8 x i16> @fcvtzu_h_nxv8f16(<vscale x 8 x half> %a) {
; CHECK-LABEL: fcvtzu_h_nxv8f16:
; CHECK: // %bb.0:
@@ -329,6 +379,26 @@ define <vscale x 2 x i32> @fcvtzu_s_nxv2f64(<vscale x 2 x double> %a) {
ret <vscale x 2 x i32> %res
}
+define <vscale x 3 x i32> @fcvtzu_s_nxv3f16(<vscale x 3 x half> %a) {
+; CHECK-LABEL: fcvtzu_s_nxv3f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 3 x half> %a to <vscale x 3 x i32>
+ ret <vscale x 3 x i32> %res
+}
+
+define <vscale x 3 x i32> @fcvtzu_s_nxv3f32(<vscale x 3 x float> %a) {
+; CHECK-LABEL: fcvtzu_s_nxv3f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %res = fptoui <vscale x 3 x float> %a to <vscale x 3 x i32>
+ ret <vscale x 3 x i32> %res
+}
+
define <vscale x 4 x i32> @fcvtzu_s_nxv4f16(<vscale x 4 x half> %a) {
; CHECK-LABEL: fcvtzu_s_nxv4f16:
; CHECK: // %bb.0:
@@ -422,6 +492,27 @@ define <vscale x 2 x half> @scvtf_h_nxv2i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x half> %res
}
+define <vscale x 3 x half> @scvtf_h_nxv3i1(<vscale x 3 x i1> %a) {
+; CHECK-LABEL: scvtf_h_nxv3i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: ret
+ %res = sitofp <vscale x 3 x i1> %a to <vscale x 3 x half>
+ ret <vscale x 3 x half> %res
+}
+
+define <vscale x 3 x half> @scvtf_h_nxv3i16(<vscale x 3 x i16> %a) {
+; CHECK-LABEL: scvtf_h_nxv3i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %res = sitofp <vscale x 3 x i16> %a to <vscale x 3 x half>
+ ret <vscale x 3 x half> %res
+}
+
define <vscale x 4 x half> @scvtf_h_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv4i1:
; CHECK: // %bb.0:
@@ -453,6 +544,27 @@ define <vscale x 4 x half> @scvtf_h_nxv4i32(<vscale x 4 x i32> %a) {
ret <vscale x 4 x half> %res
}
+define <vscale x 7 x half> @scvtf_h_nxv7i1(<vscale x 7 x i1> %a) {
+; CHECK-LABEL: scvtf_h_nxv7i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %res = sitofp <vscale x 7 x i1> %a to <vscale x 7 x half>
+ ret <vscale x 7 x half> %res
+}
+
+define <vscale x 7 x half> @scvtf_h_nxv7i16(<vscale x 7 x i16> %a) {
+; CHECK-LABEL: scvtf_h_nxv7i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.h
+; CHECK-NEXT: scvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %res = sitofp <vscale x 7 x i16> %a to <vscale x 7 x half>
+ ret <vscale x 7 x half> %res
+}
+
define <vscale x 8 x half> @scvtf_h_nxv8i1(<vscale x 8 x i1> %a) {
; CHECK-LABEL: scvtf_h_nxv8i1:
; CHECK: // %bb.0:
@@ -505,6 +617,27 @@ define <vscale x 2 x float> @scvtf_s_nxv2i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x float> %res
}
+define <vscale x 3 x float> @scvtf_s_nxv3i1(<vscale x 3 x i1> %a) {
+; CHECK-LABEL: scvtf_s_nxv3i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %res = sitofp <vscale x 3 x i1> %a to <vscale x 3 x float>
+ ret <vscale x 3 x float> %res
+}
+
+define <vscale x 3 x float> @scvtf_s_nxv3i32(<vscale x 3 x i32> %a) {
+; CHECK-LABEL: scvtf_s_nxv3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: scvtf z0.s, p0/m, z0.s
+; CHECK-NEXT: ret
+ %res = sitofp <vscale x 3 x i32> %a to <vscale x 3 x float>
+ ret <vscale x 3 x float> %res
+}
+
define <vscale x 4 x float> @scvtf_s_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: scvtf_s_nxv4i1:
; CHECK: // %bb.0:
@@ -600,6 +733,37 @@ define <vscale x 2 x half> @ucvtf_h_nxv2i64(<vscale x 2 x i64> %a) {
ret <vscale x 2 x half> %res
}
+define <vscale x 3 x half> @ucvtf_h_nxv3i1(<vscale x 3 x i1> %a) {
+; CHECK-LABEL: ucvtf_h_nxv3i1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: ret
+ %res = uitofp <vscale x 3 x i1> %a to <vscale x 3 x half>
+ ret <vscale x 3 x half> %res
+}
+
+define <vscale x 3 x half> @ucvtf_h_nxv3i16(<vscale x 3 x i16> %a) {
+; CHECK-LABEL: ucvtf_h_nxv3i16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h
+; CHECK-NEXT: ret
+ %res = uitofp <vscale x 3 x i16> %a to <vscale x 3 x half>
+ ret <vscale x 3 x half> %res
+}
+
+define <vscale x 3 x half> @ucvtf_h_nxv3i32(<vscale x 3 x i32> %a) {
+; CHECK-LABEL: ucvtf_h_nxv3i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ptrue p0.s
+; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s
+; CHECK-NEXT: ret
+ %res = uitofp <vscale x 3 x i32> %a to <vscale x 3 x half>
+ ret <vscale x 3 x half> %res
+}
+
define <vscale x 4 x half> @ucvtf_h_nxv4i1(<vscale x 4 x i1> %a) {
; CHECK-LABEL: ucvtf_h_nxv4i1:
; CHECK: // %bb.0:
More information about the llvm-commits
mailing list