[llvm] 6238b8e - [LegalizeTypes] Factor in vscale_range when widening insert_subvector

Mon Aug 14 01:58:21 PDT 2023

Author: Luke Lau
Date: 2023-08-14T09:58:15+01:00
New Revision: 6238b8ea6399e485a197b5962c033974ed8fca37

URL: https://github.com/llvm/llvm-project/commit/6238b8ea6399e485a197b5962c033974ed8fca37
DIFF: https://github.com/llvm/llvm-project/commit/6238b8ea6399e485a197b5962c033974ed8fca37.diff

LOG: [LegalizeTypes] Factor in vscale_range when widening insert_subvector

Currently when widening operands for insert_subvector nodes, we check
first that the indices are valid by seeing if the subvector is
statically known to be smaller than or equal to the in-place vector.

However if we're inserting a fixed subvector into a scalable vector we rely on
the minimum vector length of the latter. This patch extends the widening logic
to also take into account the minimum vscale from the vscale_range attribute,
so we can handle more scenarios where we know the scalable vector is large
enough to contain the subvector.

Fixes https://github.com/llvm/llvm-project/issues/63437

Reviewed By: craig.topper

Differential Revision: https://reviews.llvm.org/D153519

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 8c117c1c74dc78..93194c3cdb25dd 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -6317,8 +6317,30 @@ SDValue DAGTypeLegalizer::WidenVecOp_INSERT_SUBVECTOR(SDNode *N) {
   if (getTypeAction(SubVec.getValueType()) == TargetLowering::TypeWidenVector)
     SubVec = GetWidenedVector(SubVec);
 
-  if (SubVec.getValueType().knownBitsLE(VT) && InVec.isUndef() &&
-      N->getConstantOperandVal(2) == 0)
+  EVT SubVT = SubVec.getValueType();
+
+  // Whether or not all the elements of the widened SubVec will be inserted into
+  // valid indices of VT.
+  bool IndicesValid = false;
+  // If we statically know that VT can fit SubVT, the indices are valid.
+  if (VT.knownBitsGE(SubVT))
+    IndicesValid = true;
+  else if (VT.isScalableVector() && SubVT.isFixedLengthVector()) {
+    // Otherwise, if we're inserting a fixed vector into a scalable vector and
+    // we know the minimum vscale we can work out if it's valid ourselves.
+    Attribute Attr = DAG.getMachineFunction().getFunction().getFnAttribute(
+        Attribute::VScaleRange);
+    if (Attr.isValid()) {
+      unsigned VScaleMin = Attr.getVScaleRangeMin();
+      if (VT.getSizeInBits().getKnownMinValue() * VScaleMin >=
+          SubVT.getFixedSizeInBits())
+        IndicesValid = true;
+    }
+  }
+
+  // We need to make sure that the indices are still valid, otherwise we might
+  // widen what was previously well-defined to something undefined.
+  if (IndicesValid && InVec.isUndef() && N->getConstantOperandVal(2) == 0)
     return DAG.getNode(ISD::INSERT_SUBVECTOR, SDLoc(N), VT, InVec, SubVec,
                        N->getOperand(2));
 

diff  --git a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
index 72fce35c1a957d..950c55ba1cf174 100644
--- a/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/insert-subvector.ll
@@ -495,6 +495,18 @@ define void @insert_nxv8i64_nxv16i64_hi(<vscale x 8 x i64> %sv0, <vscale x 16 x
   ret void
 }
 
+; We should be able to widen the <3 x i64> subvector to a <4 x i64> here because
+; we know that the minimum vscale is 2
+define <vscale x 2 x i64> @insert_nxv2i64_nxv3i64(<3 x i64> %sv) #0 {
+; CHECK-LABEL: insert_nxv2i64_nxv3i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  %vec = call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v3i64(<vscale x 2 x i64> undef, <3 x i64> %sv, i64 0)
+  ret <vscale x 2 x i64> %vec
+}
+
+attributes #0 = { vscale_range(2,1024) }
+
 declare <vscale x 4 x i1> @llvm.vector.insert.nxv1i1.nxv4i1(<vscale x 4 x i1>, <vscale x 1 x i1>, i64)
 declare <vscale x 32 x i1> @llvm.vector.insert.nxv8i1.nxv32i1(<vscale x 32 x i1>, <vscale x 8 x i1>, i64)
 
@@ -512,3 +524,5 @@ declare <vscale x 16 x i32> @llvm.vector.insert.nxv1i32.nxv16i32(<vscale x 16 x
 declare <vscale x 16 x i32> @llvm.vector.insert.nxv2i32.nxv16i32(<vscale x 16 x i32>, <vscale x 2 x i32>, i64 %idx)
 declare <vscale x 16 x i32> @llvm.vector.insert.nxv4i32.nxv16i32(<vscale x 16 x i32>, <vscale x 4 x i32>, i64 %idx)
 declare <vscale x 16 x i32> @llvm.vector.insert.nxv8i32.nxv16i32(<vscale x 16 x i32>, <vscale x 8 x i32>, i64 %idx)
+
+declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v3i64(<vscale x 2 x i64>, <3 x i64>, i64 %idx)