[llvm] [RISCV] Handle fixed length vectors with exact VLEN in lowerINSERT_SUBVECTOR (PR #84107)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 5 19:32:51 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Luke Lau (lukel97)
<details>
<summary>Changes</summary>
This is the insert_subvector equivalent to #<!-- -->79949, where we can avoid sliding up by the full LMUL amount if we know the exact subregister the subvector will be inserted into.
This mirrors the lowerEXTRACT_SUBVECTOR changes in that we handle this in two parts:
- We handle fixed length subvector types by converting the subvector to a scalable vector. But unlike EXTRACT_SUBVECTOR, we may also need to convert the vector being inserted into too.
- Whenever we don't need a vslideup because either the subvector aligns to a vector register group *or* the vector is undef, we need to emit an insert_subreg ourselves because RISCVISelDAGToDAG::Select doesn't correctly handle fixed length subvectors yet: see d7a28f7ad
I've left RISCVISelDAGToDAG::Select untouched for now (minus relaxing an invariant), so that the insert_subvector and extract_subvector code paths are the same.
We should teach it to properly handle fixed length subvectors in a follow-up patch, so that the "exact subregsiter" logic is handled in one place instead of being spread across both RISCVISelDAGToDAG.cpp and RISCVISelLowering.cpp.
---
Patch is 36.00 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/84107.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp (+7-1)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+99-31)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll (+114-49)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll (+139-133)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 1b8c1434c9f2d9..3fea03ec892dc2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2063,8 +2063,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
MVT SubVecContainerVT = SubVecVT;
// Establish the correct scalable-vector types for any fixed-length type.
if (SubVecVT.isFixedLengthVector()) {
- assert(Idx == 0 && V.isUndef());
SubVecContainerVT = TLI.getContainerForFixedLengthVector(SubVecVT);
+ bool AlignedToVecReg = false;
+ if (auto VLen = Subtarget->getRealVLen();
+ VLen && SubVecVT.getSizeInBits() ==
+ SubVecContainerVT.getSizeInBits().getKnownMinValue() *
+ (*VLen / RISCV::RVVBitsPerBlock))
+ AlignedToVecReg = true;
+ assert(Idx == 0 && (AlignedToVecReg || V.isUndef()));
}
MVT ContainerVT = VT;
if (VT.isFixedLengthVector())
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4c3dc63afd878d..8fa7e289924483 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -9596,6 +9596,21 @@ SDValue RISCVTargetLowering::lowerVPREDUCE(SDValue Op,
Vec, Mask, VL, DL, DAG, Subtarget);
}
+/// Returns true if \p LHS is known to be equal to \p RHS, taking into account
+/// if VLEN is exactly known by \p Subtarget and thus vscale when handling
+/// scalable quantities.
+static bool isKnownEQ(ElementCount LHS, ElementCount RHS,
+ const RISCVSubtarget &Subtarget) {
+ if (auto VLen = Subtarget.getRealVLen()) {
+ const unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
+ if (LHS.isScalable())
+ LHS = ElementCount::getFixed(LHS.getKnownMinValue() * Vscale);
+ if (RHS.isScalable())
+ RHS = ElementCount::getFixed(RHS.getKnownMinValue() * Vscale);
+ }
+ return LHS == RHS;
+}
+
SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SelectionDAG &DAG) const {
SDValue Vec = Op.getOperand(0);
@@ -9645,12 +9660,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
}
}
- // If the subvector vector is a fixed-length type, we cannot use subregister
- // manipulation to simplify the codegen; we don't know which register of a
- // LMUL group contains the specific subvector as we only know the minimum
- // register size. Therefore we must slide the vector group up the full
- // amount.
- if (SubVecVT.isFixedLengthVector()) {
+ // If the subvector vector is a fixed-length type and we don't know VLEN
+ // exactly, we cannot use subregister manipulation to simplify the codegen; we
+ // don't know which register of a LMUL group contains the specific subvector
+ // as we only know the minimum register size. Therefore we must slide the
+ // vector group up the full amount.
+ const auto VLen = Subtarget.getRealVLen();
+ if (SubVecVT.isFixedLengthVector() && !VLen) {
if (OrigIdx == 0 && Vec.isUndef() && !VecVT.isFixedLengthVector())
return Op;
MVT ContainerVT = VecVT;
@@ -9698,41 +9714,92 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
return DAG.getBitcast(Op.getValueType(), SubVec);
}
- unsigned SubRegIdx, RemIdx;
- std::tie(SubRegIdx, RemIdx) =
- RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
- VecVT, SubVecVT, OrigIdx, TRI);
+ MVT ContainerVecVT = VecVT;
+ if (VecVT.isFixedLengthVector()) {
+ ContainerVecVT = getContainerForFixedLengthVector(VecVT);
+ Vec = convertToScalableVector(ContainerVecVT, Vec, DAG, Subtarget);
+ }
- RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
+ MVT ContainerSubVecVT = SubVecVT;
+ if (SubVecVT.isFixedLengthVector()) {
+ ContainerSubVecVT = getContainerForFixedLengthVector(SubVecVT);
+ SubVec = convertToScalableVector(ContainerSubVecVT, SubVec, DAG, Subtarget);
+ }
+
+ unsigned SubRegIdx;
+ ElementCount RemIdx;
+ // insert_subvector scales the index by vscale if the subvector is scalable,
+ // and decomposeSubvectorInsertExtractToSubRegs takes this into account. So if
+ // we have a fixed length subvector, we need to adjust the index by 1/vscale.
+ if (SubVecVT.isFixedLengthVector()) {
+ assert(VLen);
+ unsigned Vscale = *VLen / RISCV::RVVBitsPerBlock;
+ auto Decompose =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ ContainerVecVT, ContainerSubVecVT, OrigIdx / Vscale, TRI);
+ SubRegIdx = Decompose.first;
+ RemIdx = ElementCount::getFixed((Decompose.second * Vscale) +
+ (OrigIdx % Vscale));
+ } else {
+ auto Decompose =
+ RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
+ ContainerVecVT, ContainerSubVecVT, OrigIdx, TRI);
+ SubRegIdx = Decompose.first;
+ RemIdx = ElementCount::getScalable(Decompose.second);
+ }
+
+ RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(ContainerSubVecVT);
bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
+ bool AlignedToVecReg = !IsSubVecPartReg;
+ if (SubVecVT.isFixedLengthVector())
+ AlignedToVecReg &= SubVecVT.getSizeInBits() ==
+ ContainerSubVecVT.getSizeInBits().getKnownMinValue() *
+ (*VLen / RISCV::RVVBitsPerBlock);
// 1. If the Idx has been completely eliminated and this subvector's size is
// a vector register or a multiple thereof, or the surrounding elements are
// undef, then this is a subvector insert which naturally aligns to a vector
// register. These can easily be handled using subregister manipulation.
- // 2. If the subvector is smaller than a vector register, then the insertion
- // must preserve the undisturbed elements of the register. We do this by
- // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
- // (which resolves to a subregister copy), performing a VSLIDEUP to place the
- // subvector within the vector register, and an INSERT_SUBVECTOR of that
+ // 2. If the subvector isn't exactly aligned to a vector register group, then
+ // the insertion must preserve the undisturbed elements of the register. We do
+ // this by lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector
+ // type (which resolves to a subregister copy), performing a VSLIDEUP to place
+ // the subvector within the vector register, and an INSERT_SUBVECTOR of that
// LMUL=1 type back into the larger vector (resolving to another subregister
// operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
// to avoid allocating a large register group to hold our subvector.
- if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
+ if (RemIdx.isZero() && (AlignedToVecReg || Vec.isUndef())) {
+ if (SubVecVT.isFixedLengthVector()) {
+ // We may get NoSubRegister if inserting at index 0 and the subvec
+ // container is the same as the vector, e.g. vec=v4i32,subvec=v4i32,idx=0
+ if (SubRegIdx == RISCV::NoSubRegister) {
+ assert(OrigIdx == 0);
+ return Op;
+ }
+
+ SDValue Insert =
+ DAG.getTargetInsertSubreg(SubRegIdx, DL, ContainerVecVT, Vec, SubVec);
+ if (VecVT.isFixedLengthVector())
+ Insert = convertFromScalableVector(VecVT, Insert, DAG, Subtarget);
+ return Insert;
+ }
return Op;
+ }
// VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
// OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
// (in our case undisturbed). This means we can set up a subvector insertion
// where OFFSET is the insertion offset, and the VL is the OFFSET plus the
// size of the subvector.
- MVT InterSubVT = VecVT;
+ MVT InterSubVT = ContainerVecVT;
SDValue AlignedExtract = Vec;
- unsigned AlignedIdx = OrigIdx - RemIdx;
- if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
- InterSubVT = getLMUL1VT(VecVT);
+ unsigned AlignedIdx = OrigIdx - RemIdx.getKnownMinValue();
+ if (SubVecVT.isFixedLengthVector())
+ AlignedIdx /= *VLen / RISCV::RVVBitsPerBlock;
+ if (ContainerVecVT.bitsGT(getLMUL1VT(ContainerVecVT))) {
+ InterSubVT = getLMUL1VT(ContainerVecVT);
// Extract a subvector equal to the nearest full vector register type. This
// should resolve to a EXTRACT_SUBREG instruction.
AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
@@ -9743,25 +9810,23 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
DAG.getUNDEF(InterSubVT), SubVec,
DAG.getVectorIdxConstant(0, DL));
- auto [Mask, VL] = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
+ auto [Mask, VL] = getDefaultVLOps(VecVT, ContainerVecVT, DL, DAG, Subtarget);
- ElementCount EndIndex =
- ElementCount::getScalable(RemIdx) + SubVecVT.getVectorElementCount();
- VL = computeVLMax(SubVecVT, DL, DAG);
+ ElementCount EndIndex = RemIdx + SubVecVT.getVectorElementCount();
+ VL = DAG.getElementCount(DL, XLenVT, SubVecVT.getVectorElementCount());
// Use tail agnostic policy if we're inserting over InterSubVT's tail.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED;
- if (EndIndex == InterSubVT.getVectorElementCount())
+ if (isKnownEQ(EndIndex, InterSubVT.getVectorElementCount(), Subtarget))
Policy = RISCVII::TAIL_AGNOSTIC;
// If we're inserting into the lowest elements, use a tail undisturbed
// vmv.v.v.
- if (RemIdx == 0) {
+ if (RemIdx.isZero()) {
SubVec = DAG.getNode(RISCVISD::VMV_V_V_VL, DL, InterSubVT, AlignedExtract,
SubVec, VL);
} else {
- SDValue SlideupAmt =
- DAG.getVScale(DL, XLenVT, APInt(XLenVT.getSizeInBits(), RemIdx));
+ SDValue SlideupAmt = DAG.getElementCount(DL, XLenVT, RemIdx);
// Construct the vector length corresponding to RemIdx + length(SubVecVT).
VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
@@ -9772,10 +9837,13 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
// If required, insert this subvector back into the correct vector register.
// This should resolve to an INSERT_SUBREG instruction.
- if (VecVT.bitsGT(InterSubVT))
- SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, SubVec,
+ if (ContainerVecVT.bitsGT(InterSubVT))
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVecVT, Vec, SubVec,
DAG.getVectorIdxConstant(AlignedIdx, DL));
+ if (VecVT.isFixedLengthVector())
+ SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
+
// We might have bitcast from a mask type: cast back to the original type if
// required.
return DAG.getBitcast(Op.getSimpleValueType(), SubVec);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index 9f0240c53b219a..9dae07e2928706 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -9,39 +9,63 @@
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,v -riscv-v-vector-bits-max=128 -verify-machineinstrs | FileCheck -check-prefixes=CHECK,VLS,RV64VLS %s
define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, ptr %svp) {
-; CHECK-LABEL: insert_nxv8i32_v2i32_0:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v12, (a0)
-; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma
-; CHECK-NEXT: vmv.v.v v8, v12
-; CHECK-NEXT: ret
+; VLA-LABEL: insert_nxv8i32_v2i32_0:
+; VLA: # %bb.0:
+; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; VLA-NEXT: vle32.v v12, (a0)
+; VLA-NEXT: vsetivli zero, 2, e32, m4, tu, ma
+; VLA-NEXT: vmv.v.v v8, v12
+; VLA-NEXT: ret
+;
+; VLS-LABEL: insert_nxv8i32_v2i32_0:
+; VLS: # %bb.0:
+; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; VLS-NEXT: vle32.v v12, (a0)
+; VLS-NEXT: vsetivli zero, 2, e32, m1, tu, ma
+; VLS-NEXT: vmv.v.v v8, v12
+; VLS-NEXT: ret
%sv = load <2 x i32>, ptr %svp
%v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 0)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, ptr %svp) {
-; CHECK-LABEL: insert_nxv8i32_v2i32_2:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v12, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 2
-; CHECK-NEXT: ret
+; VLA-LABEL: insert_nxv8i32_v2i32_2:
+; VLA: # %bb.0:
+; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; VLA-NEXT: vle32.v v12, (a0)
+; VLA-NEXT: vsetivli zero, 4, e32, m4, tu, ma
+; VLA-NEXT: vslideup.vi v8, v12, 2
+; VLA-NEXT: ret
+;
+; VLS-LABEL: insert_nxv8i32_v2i32_2:
+; VLS: # %bb.0:
+; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; VLS-NEXT: vle32.v v12, (a0)
+; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; VLS-NEXT: vslideup.vi v8, v12, 2
+; VLS-NEXT: ret
%sv = load <2 x i32>, ptr %svp
%v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 2)
ret <vscale x 8 x i32> %v
}
define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %svp) {
-; CHECK-LABEL: insert_nxv8i32_v2i32_6:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vle32.v v12, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v12, 6
-; CHECK-NEXT: ret
+; VLA-LABEL: insert_nxv8i32_v2i32_6:
+; VLA: # %bb.0:
+; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; VLA-NEXT: vle32.v v12, (a0)
+; VLA-NEXT: vsetivli zero, 8, e32, m4, tu, ma
+; VLA-NEXT: vslideup.vi v8, v12, 6
+; VLA-NEXT: ret
+;
+; VLS-LABEL: insert_nxv8i32_v2i32_6:
+; VLS: # %bb.0:
+; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
+; VLS-NEXT: vle32.v v12, (a0)
+; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; VLS-NEXT: vslideup.vi v9, v12, 2
+; VLS-NEXT: ret
%sv = load <2 x i32>, ptr %svp
%v = call <vscale x 8 x i32> @llvm.vector.insert.v2i32.nxv8i32(<vscale x 8 x i32> %vec, <2 x i32> %sv, i64 6)
ret <vscale x 8 x i32> %v
@@ -58,9 +82,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, ptr %
;
; VLS-LABEL: insert_nxv8i32_v8i32_0:
; VLS: # %bb.0:
-; VLS-NEXT: vl2re32.v v12, (a0)
-; VLS-NEXT: vsetivli zero, 8, e32, m4, tu, ma
-; VLS-NEXT: vmv.v.v v8, v12
+; VLS-NEXT: vl2re32.v v8, (a0)
; VLS-NEXT: ret
%sv = load <8 x i32>, ptr %svp
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
@@ -78,9 +100,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, ptr %
;
; VLS-LABEL: insert_nxv8i32_v8i32_8:
; VLS: # %bb.0:
-; VLS-NEXT: vl2re32.v v12, (a0)
-; VLS-NEXT: vsetivli zero, 16, e32, m4, tu, ma
-; VLS-NEXT: vslideup.vi v8, v12, 8
+; VLS-NEXT: vl2re32.v v10, (a0)
; VLS-NEXT: ret
%sv = load <8 x i32>, ptr %svp
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
@@ -98,6 +118,31 @@ define <vscale x 8 x i32> @insert_nxv8i32_undef_v2i32_0(ptr %svp) {
ret <vscale x 8 x i32> %v
}
+define <vscale x 2 x i32> @insert_nxv8i32_v4i32_0(<vscale x 2 x i32> %vec, <4 x i32> %subvec) {
+; VLA-LABEL: insert_nxv8i32_v4i32_0:
+; VLA: # %bb.0:
+; VLA-NEXT: vsetivli zero, 4, e32, m1, tu, ma
+; VLA-NEXT: vmv.v.v v8, v9
+; VLA-NEXT: ret
+;
+; VLS-LABEL: insert_nxv8i32_v4i32_0:
+; VLS: # %bb.0:
+; VLS-NEXT: vmv1r.v v8, v9
+; VLS-NEXT: ret
+ %v = call <vscale x 2 x i32> @llvm.vector.insert.nxv2i32.v4i32(<vscale x 2 x i32> %vec, <4 x i32> %subvec, i64 0)
+ ret <vscale x 2 x i32> %v
+}
+
+
+define <4 x i32> @insert_v4i32_v4i32_0(<4 x i32> %vec, <4 x i32> %subvec) {
+; CHECK-LABEL: insert_v4i32_v4i32_0:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v = call <4 x i32> @llvm.vector.insert.v4i32.v4i32(<4 x i32> %vec, <4 x i32> %subvec, i64 0)
+ ret <4 x i32> %v
+}
+
define void @insert_v4i32_v2i32_0(ptr %vp, ptr %svp) {
; VLA-LABEL: insert_v4i32_v2i32_0:
; VLA: # %bb.0:
@@ -175,6 +220,31 @@ define void @insert_v4i32_undef_v2i32_0(ptr %vp, ptr %svp) {
ret void
}
+; This tests the code path in RISCVISelDAGToDAG::Select where we select an
+; insert_subvector with a fixed vector and fixed subvector type. The phi here is
+; used to prevent the fixed insert_subvector from being combined away into a
+; scalable insert_subvector.
+define <4 x i32> @insert_v4i32_undef_v2i32_0_phi(<2 x i32> %subvec, i1 %cond) {
+; CHECK-LABEL: insert_v4i32_undef_v2i32_0_phi:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: andi a0, a0, 1
+; CHECK-NEXT: bnez a0, .LBB11_2
+; CHECK-NEXT: # %bb.1:
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; CHECK-NEXT: vmv.v.i v8, 0
+; CHECK-NEXT: .LBB11_2: # %bar
+; CHECK-NEXT: ret
+entry:
+ br i1 %cond, label %foo, label %bar
+foo:
+ %v = call <4 x i32> @llvm.vector.insert.v2i32.v4i32(<4 x i32> undef, <2 x i32> %subvec, i64 0)
+ br label %bar
+bar:
+ %w = phi <4 x i32> [%v, %foo], [zeroinitializer, %entry]
+ ret <4 x i32> %w
+}
+
+
define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
; VLA-LABEL: insert_v8i32_v2i32_0:
; VLA: # %bb.0:
@@ -193,7 +263,7 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; VLS-NEXT: vle32.v v8, (a1)
; VLS-NEXT: vl2re32.v v10, (a0)
-; VLS-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; VLS-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; VLS-NEXT: vmv.v.v v10, v8
; VLS-NEXT: vs2r.v v10, (a0)
; VLS-NEXT: ret
@@ -220,11 +290,11 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
; VLS-LABEL: insert_v8i32_v2i32_2:
; VLS: # %bb.0:
; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; VLS-NEXT: vl2re32.v v8, (a0)
-; VLS-NEXT: vle32.v v10, (a1)
-; VLS-NEXT: vsetivli zero, 4, e32, m2, tu, ma
-; VLS-NEXT: vslideup.vi v8, v10, 2
-; VLS-NEXT: vs2r.v v8, (a0)
+; VLS-NEXT: vle32.v v8, (a1)
+; VLS-NEXT: vl2re32.v v10, (a0)
+; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; VLS-NEXT: vslideup.vi v10, v8, 2
+; VLS-NEXT: vs2r.v v10, (a0)
; VLS-NEXT: ret
%sv = load <2 x i32>, ptr %svp
%vec = load <8 x i32>, ptr %vp
@@ -247,11 +317,11 @@ define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) {
; VLS-LABEL: insert_v8i32_v2i32_6:
; VLS: # %bb.0:
; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; VLS-NEXT: vl2re32.v v8, (a0)
-; VLS-NEXT: vle32.v v10, (a1)
-; VLS-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; VLS-NEXT: vslideup.vi v8, v10, 6
-; VLS-NEXT: vs2r.v v8, (a0)
+; VLS-NEXT: vle32.v v8, (a1)
+; VLS-NEXT: vl2re32.v v10, (a0)
+; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; VLS-NEXT: vslideup.vi v11, v8, 2
+; VLS-NEXT: vs2r.v v10, (a0)
; VLS-NEXT: ret
%sv = load <2 x i32>, ptr %svp
%vec = load <8 x i32>, ptr %vp
@@ -274,9 +344,9 @@ define void @insert_v8i32_undef_v2i32_6(ptr %vp, ptr %svp) {
; VLS: # %bb.0:
; VLS-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; VLS-NEXT: vle32.v v8, (a1)
-; VLS-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; VLS-NEXT: vslideup.vi v10, v8, 6
-; VLS-NEXT: vs2r.v v10, (a0)
+; VLS-NEXT: vsetivli zero, 4, e32, m1, ta, ma
+; VLS-NEXT: vslideup.vi v9, v8, 2
+; VLS-NEXT: vs2r.v v8, (a0)
; VLS-NEXT: ret
%sv = load <2 x i32>, ptr %svp
%v = call <8 x i32> @llvm.vector.insert.v2i32.v8i32(<8 x i32> undef, <2 x i32> %sv, i64 6)
@@ -542,9 +612,7 @@ define void @insert_v2i64_nxv16i64(ptr %psv0, ptr %psv1, ptr %out) {
; VLS-LABEL: insert_v2i64_nxv16i64:
; VLS: # %bb.0:
; VLS-NEXT: vl1re64.v v8, (a0)
-; VLS-NEXT: vl1re64.v v16, (a1)
-; VLS-NEXT: vsetivli zero, 6, e64, m8, tu, ma
-; VLS-NEXT: vslideup.vi v8, v16, 4
+; VLS-...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/84107
More information about the llvm-commits
mailing list