[llvm] [RISCV] Fix crash when lowering fixed length insert_subvector into undef at 0 (PR #67535)

Wed Sep 27 03:09:05 PDT 2023

llvmbot wrote:




@llvm/pr-subscribers-backend-risc-v

<details>
<summary>Changes</summary>

This fixes a crash seen in https://github.com/openxla/iree/issues/15038 and
elsewhere. We were reducing the LMUL for inserts into undef at 0 without
inserting it back into the original LMUL at the end. But we don't actually
perform the slidedown in this path, so we can just skip reducing LMUL here.


---
Full diff: https://github.com/llvm/llvm-project/pull/67535.diff


2 Files Affected:

- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+9-4) 
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll (+18-11) 


``````````diff

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2b8e5aeeb86405a..bab7d2070733382 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -8664,6 +8664,14 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
     }
 
+    if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
+      SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
+                           DAG.getUNDEF(ContainerVT), SubVec,
+                           DAG.getConstant(0, DL, XLenVT));
+      SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
+      return DAG.getBitcast(Op.getValueType(), SubVec);
+    }
+
     // Shrink down Vec so we're performing the slideup on a smaller LMUL.
     unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
     MVT OrigContainerVT = ContainerVT;
@@ -8678,10 +8686,7 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
                          DAG.getUNDEF(ContainerVT), SubVec,
                          DAG.getConstant(0, DL, XLenVT));
-    if (OrigIdx == 0 && Vec.isUndef() && VecVT.isFixedLengthVector()) {
-      SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
-      return DAG.getBitcast(Op.getValueType(), SubVec);
-    }
+
     SDValue Mask =
         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
     // Set the vector length to only the number of elements we care about. Note
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index 6a9212ed309a8ef..a4f460f1667a2fe 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -95,17 +95,6 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, ptr %
   ret <vscale x 8 x i32> %v
 }
 
-define <vscale x 8 x i32> @insert_nxv8i32_undef_v2i32_0(ptr %svp) {
-; CHECK-LABEL: insert_nxv8i32_undef_v2i32_0:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT:    vle32.v v8, (a0)
-; CHECK-NEXT:    ret
-  %sv = load <2 x i32>, ptr %svp
-  %v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> undef, <2 x i32> %sv, i64 0)
-  ret <vscale x 8 x i32> %v
-}
-
 define void @insert_v4i32_v2i32_0(ptr %vp, ptr %svp) {
 ; CHECK-LABEL: insert_v4i32_v2i32_0:
 ; CHECK:       # %bb.0:
@@ -636,3 +625,21 @@ declare <vscale x 2 x i16> @llvm.vector.insert.v2i16.nxv2i16(<vscale x 2 x i16>,
 declare <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32>, <2 x i32>, i64)
 declare <vscale x 8 x i32> @llvm.vector.insert.v4i32.nxv8i32(<vscale x 8 x i32>, <4 x i32>, i64)
 declare <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32>, <8 x i32>, i64)
+
+; We emit insert_subvectors of fixed vectors at index 0 into undefs as a
+; copy_to_regclass or insert_subreg, depending on the register classes of the
+; vector types. Make sure that we use the correct type and not the shrunken
+; LMUL=1 type, otherwise we will end up with an invalid extract_subvector when
+; converting it from scalable->fixed, e.g. we get this for VLEN=128:
+;
+;   t14: nxv2i32 = insert_subvector undef:nxv2i32, t4, Constant:i64<0>
+; t15: v8i32 = extract_subvector t14, Constant:i64<0>
+declare <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32>, i64)
+define <4 x i32> @insert_extract_v8i32_v2i32_0(<2 x i32> %v) {
+; CHECK-LABEL: insert_extract_v8i32_v2i32_0:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ret
+  %1 = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> poison, <2 x i32> %v, i64 0)
+  %2 = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> %1, i64 0)
+  ret <4 x i32> %2
+}

``````````

</details>


https://github.com/llvm/llvm-project/pull/67535