[llvm] c5ec00e - [TargetLowering] Improve legalization of scalable vector types

Wed May 12 08:40:51 PDT 2021

Author: Fraser Cormack
Date: 2021-05-12T16:33:07+01:00
New Revision: c5ec00e62b0e7b91eb07e25441c7ed38227f5bf3

URL: https://github.com/llvm/llvm-project/commit/c5ec00e62b0e7b91eb07e25441c7ed38227f5bf3
DIFF: https://github.com/llvm/llvm-project/commit/c5ec00e62b0e7b91eb07e25441c7ed38227f5bf3.diff

LOG: [TargetLowering] Improve legalization of scalable vector types

This patch extends the vector type-conversion and legalization capabilities of
scalable vector types.

Firstly, `vscale x 1` types now behave more like the corresponding `vscale x
2+` types. This enables the integer promotion legalization of extended scalable
types, such as the promotion of `<vscale x 1 x i5>` to `<vscale x 1 x i8>`.

These `vscale x 1` types are also now better handled by
`getVectorTypeBreakdown`, where what looks like older handling for 1-element
fixed-length vector types was spuriously updated to include scalable types.

Widening of scalable types is now better supported, by using `INSERT_SUBVECTOR`
to insert the smaller scalable vector "value" type into the wider scalable
vector "part" type. This allows AArch64 to pass and return `vscale x 1` types
by value by widening.

There are still cases where we are unable to legalize `vscale x 1` types, such
as where expansion would require splitting the vector in two.

Reviewed By: sdesmalen

Differential Revision: https://reviews.llvm.org/D102073

Added: 
    llvm/test/CodeGen/AArch64/sve-widen-scalable-vectortype.ll
    llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll

Modified: 
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/CodeGen/TargetLoweringBase.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a8db972de10a2..4b2d811a1fa04 100644

--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -607,30 +607,39 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
     std::reverse(Parts, Parts + OrigNumParts);
 }
 
-static SDValue widenVectorToPartType(SelectionDAG &DAG,
-                                     SDValue Val, const SDLoc &DL, EVT PartVT) {
-  if (!PartVT.isFixedLengthVector())
+static SDValue widenVectorToPartType(SelectionDAG &DAG, SDValue Val,
+                                     const SDLoc &DL, EVT PartVT) {
+  if (!PartVT.isVector())
     return SDValue();
 
   EVT ValueVT = Val.getValueType();
-  unsigned PartNumElts = PartVT.getVectorNumElements();
-  unsigned ValueNumElts = ValueVT.getVectorNumElements();
-  if (PartNumElts > ValueNumElts &&
-      PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
-    EVT ElementVT = PartVT.getVectorElementType();
-    // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
-    // undef elements.
-    SmallVector<SDValue, 16> Ops;
-    DAG.ExtractVectorElements(Val, Ops);
-    SDValue EltUndef = DAG.getUNDEF(ElementVT);
-    for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i)
-      Ops.push_back(EltUndef);
-
-    // FIXME: Use CONCAT for 2x -> 4x.
-    return DAG.getBuildVector(PartVT, DL, Ops);
-  }
+  ElementCount PartNumElts = PartVT.getVectorElementCount();
+  ElementCount ValueNumElts = ValueVT.getVectorElementCount();
+
+  // We only support widening vectors with equivalent element types and
+  // fixed/scalable properties. If a target needs to widen a fixed-length type
+  // to a scalable one, it should be possible to use INSERT_SUBVECTOR below.
+  if (ElementCount::isKnownLE(PartNumElts, ValueNumElts) ||
+      PartNumElts.isScalable() != ValueNumElts.isScalable() ||
+      PartVT.getVectorElementType() != ValueVT.getVectorElementType())
+    return SDValue();
 
-  return SDValue();
+  // Widening a scalable vector to another scalable vector is done by inserting
+  // the vector into a larger undef one.
+  if (PartNumElts.isScalable())
+    return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
+                       Val, DAG.getVectorIdxConstant(0, DL));
+
+  EVT ElementVT = PartVT.getVectorElementType();
+  // Vector widening case, e.g. <2 x float> -> <4 x float>.  Shuffle in
+  // undef elements.
+  SmallVector<SDValue, 16> Ops;
+  DAG.ExtractVectorElements(Val, Ops);
+  SDValue EltUndef = DAG.getUNDEF(ElementVT);
+  Ops.append((PartNumElts - ValueNumElts).getFixedValue(), EltUndef);
+
+  // FIXME: Use CONCAT for 2x -> 4x.
+  return DAG.getBuildVector(PartVT, DL, Ops);
 }
 
 /// getCopyToPartsVector - Create a series of nodes that contain the specified

diff  --git a/llvm/lib/CodeGen/TargetLoweringBase.cpp b/llvm/lib/CodeGen/TargetLoweringBase.cpp
index 644480d26fac0..02eb9a50391d8 100644
--- a/llvm/lib/CodeGen/TargetLoweringBase.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringBase.cpp
@@ -976,9 +976,6 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
   if (NumElts.isScalar())
     return LegalizeKind(TypeScalarizeVector, EltVT);
 
-  if (VT.getVectorElementCount() == ElementCount::getScalable(1))
-    report_fatal_error("Cannot legalize this vector");
-
   // Try to widen vector elements until the element type is a power of two and
   // promote it to a legal type later on, for example:
   // <3 x i8> -> <4 x i8> -> <4 x i32>
@@ -996,9 +993,12 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
 
     // If type is to be expanded, split the vector.
     //  <4 x i140> -> <2 x i140>
-    if (LK.first == TypeExpandInteger)
+    if (LK.first == TypeExpandInteger) {
+      if (VT.getVectorElementCount() == ElementCount::getScalable(1))
+        report_fatal_error("Cannot legalize this scalable vector");
       return LegalizeKind(TypeSplitVector,
                           VT.getHalfNumVectorElementsVT(Context));
+    }
 
     // Promote the integer element types until a legal vector type is found
     // or until the element integer type is too big. If a legal type was not
@@ -1057,6 +1057,9 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
     return LegalizeKind(TypeWidenVector, NVT);
   }
 
+  if (VT.getVectorElementCount() == ElementCount::getScalable(1))
+    report_fatal_error("Cannot legalize this vector");
+
   // Vectors with illegal element types are expanded.
   EVT NVT = EVT::getVectorVT(Context, EltVT,
                              VT.getVectorElementCount().divideCoefficientBy(2));
@@ -1497,10 +1500,10 @@ MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const {
 /// This method returns the number of registers needed, and the VT for each
 /// register.  It also returns the VT and quantity of the intermediate values
 /// before they are promoted/expanded.
-unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
-                                                EVT &IntermediateVT,
-                                                unsigned &NumIntermediates,
-                                                MVT &RegisterVT) const {
+unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context,
+                                                    EVT VT, EVT &IntermediateVT,
+                                                    unsigned &NumIntermediates,
+                                                    MVT &RegisterVT) const {
   ElementCount EltCnt = VT.getVectorElementCount();
 
   // If there is a wider vector type with the same element type as this one,
@@ -1509,7 +1512,7 @@ unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT
   // This handles things like <2 x float> -> <4 x float> and
   // <4 x i1> -> <4 x i32>.
   LegalizeTypeAction TA = getTypeAction(Context, VT);
-  if (EltCnt.getKnownMinValue() != 1 &&
+  if (!EltCnt.isScalar() &&
       (TA == TypeWidenVector || TA == TypePromoteInteger)) {
     EVT RegisterEVT = getTypeToTransformTo(Context, VT);
     if (isTypeLegal(RegisterEVT)) {

diff  --git a/llvm/test/CodeGen/AArch64/sve-widen-scalable-vectortype.ll b/llvm/test/CodeGen/AArch64/sve-widen-scalable-vectortype.ll
new file mode 100644
index 0000000000000..0675944c6d07e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-widen-scalable-vectortype.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s
+
+; Test that scalable vectors that are smaller than the legal vector size can be
+; properly widened to part vectors.
+
+;
+; Vectors that need widening
+;
+
+; For now, just check that these don't crash during legalization. Widening of
+; scalable-vector INSERT_SUBVECTOR and EXTRACT_SUBVECTOR is not yet available.
+define <vscale x 1 x i32> @widen_1i32(<vscale x 1 x i32> %illegal) nounwind {
+  ret <vscale x 1 x i32> %illegal
+}
+
+define <vscale x 1 x double> @widen_1f64(<vscale x 1 x double> %illegal) nounwind {
+  ret <vscale x 1 x double> %illegal
+}

diff  --git a/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
new file mode 100644
index 0000000000000..a3b8f8cb8dfaf
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/legalize-scalable-vectortype.ll
@@ -0,0 +1,27 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v -verify-machineinstrs < %s | FileCheck %s
+
+define <vscale x 4 x i5> @trunc_nxv4i32_to_nxv4i5(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: trunc_nxv4i32_to_nxv4i5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,m1,ta,mu
+; CHECK-NEXT:    vnsrl.wi v25, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf2,ta,mu
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
+; CHECK-NEXT:    ret
+  %v = trunc <vscale x 4 x i32> %a to <vscale x 4 x i5>
+  ret <vscale x 4 x i5> %v
+}
+
+define <vscale x 1 x i5> @trunc_nxv1i32_to_nxv1i5(<vscale x 1 x i32> %a) {
+; CHECK-LABEL: trunc_nxv1i32_to_nxv1i5:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    vsetvli a0, zero, e16,mf4,ta,mu
+; CHECK-NEXT:    vnsrl.wi v25, v8, 0
+; CHECK-NEXT:    vsetvli a0, zero, e8,mf8,ta,mu
+; CHECK-NEXT:    vnsrl.wi v8, v25, 0
+; CHECK-NEXT:    ret
+  %v = trunc <vscale x 1 x i32> %a to <vscale x 1 x i5>
+  ret <vscale x 1 x i5> %v
+}