[llvm] [RISCV] Reduce LMUL when index is known when lowering insert_vector_elt (PR #66087)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 12 06:47:53 PDT 2023
llvmbot wrote:
@llvm/pr-subscribers-backend-risc-v
<details>
<summary>Changes</summary>
Continuing on from #65997, if the index of insert_vector_elt is a constant then we can work out what the minimum number of registers will be needed for the slideup and choose a smaller type to operate on.
This reduces the LMUL for not just the slideup but also for the scalar insert.
--
Patch is 131.09 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66087.diff
14 Files Affected:
- (modified) llvm/lib/CodeGen/TwoAddressInstructionPass.cpp (+21-2)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+69-26)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll (+21-24)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll (+7-7)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+146-146)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-combine.ll (+40-40)
- (modified) llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll (+150-180)
- (modified) llvm/test/CodeGen/RISCV/rvv/insertelt-fp.ll (+17-17)
- (modified) llvm/test/CodeGen/RISCV/rvv/insertelt-i1.ll (+3-3)
- (modified) llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv32.ll (+27-27)
- (modified) llvm/test/CodeGen/RISCV/rvv/insertelt-int-rv64.ll (+23-23)
- (added) llvm/test/CodeGen/RISCV/rvv/twoaddressinstruction-subreg-liveness-update.mir (+42)
- (modified) llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll (+3-2)
<pre>
diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index 45f61262faf9391..e36bffc91b91d95 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1871,11 +1871,30 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
SlotIndex Idx = LIS->getInstructionIndex(*mi);
for (auto &S : LI.subranges()) {
if ((S.LaneMask & LaneMask).none()) {
+ // If Idx is 160B, and we have a subrange that isn't in
+ // %reg.subidx like so:
+ //
+ // [152r,160r)[160r,256r)
+ //
+ // Merge the two segments together so the subrange becomes:
+ //
+ // [152r,256r)
LiveRange::iterator UseSeg = S.FindSegmentContaining(Idx);
- LiveRange::iterator DefSeg = std::next(UseSeg);
- S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
+ if (UseSeg != S.end()) {
+ LiveRange::iterator DefSeg = std::next(UseSeg);
+ assert(DefSeg != S.end());
+ S.MergeValueNumberInto(DefSeg->valno, UseSeg->valno);
+ }
+ // Otherwise, it should have only one segment that starts at
+ // 160r which we should remove.
+ else {
+ assert(S.containsOneValue());
+ assert(S.begin()->start == Idx.getRegSlot());
+ S.removeSegment(S.begin());
+ }
}
}
+ LI.removeEmptySubRanges();
// The COPY no longer has a use of %reg.
LIS->shrinkToUses(&LI);
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4ff264635cda248..db743f1c67a6232 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -7345,6 +7345,32 @@ RISCVTargetLowering::lowerVectorFPExtendOrRoundLike(SDValue Op,
return Result;
}
+// Given a scalable vector type and an index into it, returns the type for the
+// smallest subvector that the index fits in. This can be used to reduce LMUL
+// for operations like vslidedown.
+//
+// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
+static std::optional<MVT>
+getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ assert(VecVT.isScalableVector());
+ const unsigned EltSize = VecVT.getScalarSizeInBits();
+ const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
+ const unsigned MinVLMAX = VectorBitsMin / EltSize;
+ MVT SmallerVT;
+ if (MaxIdx < MinVLMAX)
+ SmallerVT = getLMUL1VT(VecVT);
+ else if (MaxIdx < MinVLMAX * 2)
+ SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
+ else if (MaxIdx < MinVLMAX * 4)
+ SmallerVT = getLMUL1VT(VecVT)
+ .getDoubleNumVectorElementsVT()
+ .getDoubleNumVectorElementsVT();
+ if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
+ return std::nullopt;
+ return SmallerVT;
+}
+
// Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
// first position of a vector, and that vector is slid up to the insert index.
// By limiting the active vector length to index+1 and merging with the
@@ -7375,6 +7401,19 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
+ MVT OrigContainerVT = ContainerVT;
+ SDValue OrigVec = Vec;
+ // If we know the index we're going to insert at, we can shrink down Vec so
+ // we're performing the vslide1down on a smaller LMUL.
+ if (auto *CIdx = dyn_cast<ConstantSDNode>(Idx)) {
+ if (auto ShrunkVT = getSmallestVTForIndex(ContainerVT, CIdx->getZExtValue(),
+ DL, DAG, Subtarget)) {
+ ContainerVT = *ShrunkVT;
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+ }
+
MVT XLenVT = Subtarget.getXLenVT();
bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
@@ -7399,6 +7438,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
if (isNullConstant(Idx)) {
Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
+
+ if (ContainerVT != OrigContainerVT)
+ Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ Vec, DAG.getVectorIdxConstant(0, DL));
if (!VecVT.isFixedLengthVector())
return Vec;
return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
@@ -7431,6 +7474,10 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
// Bitcast back to the right container type.
ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
+ if (ContainerVT != OrigContainerVT)
+ ValInVec =
+ DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ ValInVec, DAG.getVectorIdxConstant(0, DL));
if (!VecVT.isFixedLengthVector())
return ValInVec;
return convertFromScalableVector(VecVT, ValInVec, DAG, Subtarget);
@@ -7461,37 +7508,15 @@ SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
Policy = RISCVII::TAIL_AGNOSTIC;
SDValue Slideup = getVSlideup(DAG, Subtarget, DL, ContainerVT, Vec, ValInVec,
Idx, Mask, InsertVL, Policy);
+
+ if (ContainerVT != OrigContainerVT)
+ Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ Slideup, DAG.getVectorIdxConstant(0, DL));
if (!VecVT.isFixedLengthVector())
return Slideup;
return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
}
-// Given a scalable vector type and an index into it, returns the type for the
-// smallest subvector that the index fits in. This can be used to reduce LMUL
-// for operations like vslidedown.
-//
-// E.g. With Zvl128b, index 3 in a nxv4i32 fits within the first nxv2i32.
-static std::optional<MVT>
-getSmallestVTForIndex(MVT VecVT, unsigned MaxIdx, SDLoc DL, SelectionDAG &DAG,
- const RISCVSubtarget &Subtarget) {
- assert(VecVT.isScalableVector());
- const unsigned EltSize = VecVT.getScalarSizeInBits();
- const unsigned VectorBitsMin = Subtarget.getRealMinVLen();
- const unsigned MinVLMAX = VectorBitsMin / EltSize;
- MVT SmallerVT;
- if (MaxIdx < MinVLMAX)
- SmallerVT = getLMUL1VT(VecVT);
- else if (MaxIdx < MinVLMAX * 2)
- SmallerVT = getLMUL1VT(VecVT).getDoubleNumVectorElementsVT();
- else if (MaxIdx < MinVLMAX * 4)
- SmallerVT = getLMUL1VT(VecVT)
- .getDoubleNumVectorElementsVT()
- .getDoubleNumVectorElementsVT();
- if (!SmallerVT.isValid() || !VecVT.bitsGT(SmallerVT))
- return std::nullopt;
- return SmallerVT;
-}
-
// Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
// extract the first element: (extractelt (slidedown vec, idx), 0). For integer
// types this is done using VMV_X_S to allow us to glean information about the
@@ -8606,6 +8631,18 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
ContainerVT = getContainerForFixedLengthVector(VecVT);
Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
}
+
+ // Shrink down Vec so we're performing the slideup on a smaller LMUL.
+ unsigned LastIdx = OrigIdx + SubVecVT.getVectorNumElements() - 1;
+ MVT OrigContainerVT = ContainerVT;
+ SDValue OrigVec = Vec;
+ if (auto ShrunkVT =
+ getSmallestVTForIndex(ContainerVT, LastIdx, DL, DAG, Subtarget)) {
+ ContainerVT = *ShrunkVT;
+ Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ContainerVT, Vec,
+ DAG.getVectorIdxConstant(0, DL));
+ }
+
SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), SubVec,
DAG.getConstant(0, DL, XLenVT));
@@ -8636,6 +8673,12 @@ SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
SlideupAmt, Mask, VL, Policy);
}
+ // If we performed the slideup on a smaller LMUL, insert the result back
+ // into the rest of the vector.
+ if (ContainerVT != OrigContainerVT)
+ SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, OrigContainerVT, OrigVec,
+ SubVec, DAG.getVectorIdxConstant(0, DL));
+
if (VecVT.isFixedLengthVector())
SubVec = convertFromScalableVector(VecVT, SubVec, DAG, Subtarget);
return DAG.getBitcast(Op.getValueType(), SubVec);
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
index b3cbad3d9e6b1d7..f7737784d4ca57e 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-i1.ll
@@ -108,7 +108,7 @@ define <64 x i1> @insertelt_v64i1(<64 x i1> %x, i1 %elt) nounwind {
; CHECK-NEXT: vmv.v.i v8, 0
; CHECK-NEXT: vmerge.vim v8, v8, 1, v0
; CHECK-NEXT: vmv.s.x v12, a0
-; CHECK-NEXT: vsetivli zero, 2, e8, m4, tu, ma
+; CHECK-NEXT: vsetivli zero, 2, e8, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 1
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
; CHECK-NEXT: vand.vi v8, v8, 1
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index 1d6a45ed36f335c..6a9212ed309a8ef 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -14,7 +14,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_0(<vscale x 8 x i32> %vec, ptr %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a0)
-; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma
+; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; CHECK-NEXT: vmv.v.v v8, v12
; CHECK-NEXT: ret
%sv = load <2 x i32>, ptr %svp
@@ -27,7 +27,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_2(<vscale x 8 x i32> %vec, ptr %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma
+; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 2
; CHECK-NEXT: ret
%sv = load <2 x i32>, ptr %svp
@@ -40,7 +40,7 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v12, (a0)
-; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v12, 6
; CHECK-NEXT: ret
%sv = load <2 x i32>, ptr %svp
@@ -51,22 +51,19 @@ define <vscale x 8 x i32> @insert_nxv8i32_v2i32_6(<vscale x 8 x i32> %vec, ptr %
define <vscale x 8 x i32> @insert_nxv8i32_v8i32_0(<vscale x 8 x i32> %vec, ptr %svp) {
; LMULMAX2-LABEL: insert_nxv8i32_v8i32_0:
; LMULMAX2: # %bb.0:
-; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; LMULMAX2-NEXT: vle32.v v12, (a0)
-; LMULMAX2-NEXT: vsetivli zero, 8, e32, m4, tu, ma
-; LMULMAX2-NEXT: vmv.v.v v8, v12
+; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, tu, ma
+; LMULMAX2-NEXT: vle32.v v8, (a0)
; LMULMAX2-NEXT: ret
;
; LMULMAX1-LABEL: insert_nxv8i32_v8i32_0:
; LMULMAX1: # %bb.0:
+; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT: vle32.v v12, (a0)
-; LMULMAX1-NEXT: addi a0, a0, 16
-; LMULMAX1-NEXT: vle32.v v16, (a0)
-; LMULMAX1-NEXT: vsetivli zero, 4, e32, m4, tu, ma
-; LMULMAX1-NEXT: vmv.v.v v8, v12
-; LMULMAX1-NEXT: vsetivli zero, 8, e32, m4, tu, ma
-; LMULMAX1-NEXT: vslideup.vi v8, v16, 4
+; LMULMAX1-NEXT: vle32.v v12, (a1)
+; LMULMAX1-NEXT: vsetvli zero, zero, e32, m1, tu, ma
+; LMULMAX1-NEXT: vle32.v v8, (a0)
+; LMULMAX1-NEXT: vsetivli zero, 8, e32, m2, tu, ma
+; LMULMAX1-NEXT: vslideup.vi v8, v12, 4
; LMULMAX1-NEXT: ret
%sv = load <8 x i32>, ptr %svp
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 0)
@@ -84,14 +81,14 @@ define <vscale x 8 x i32> @insert_nxv8i32_v8i32_8(<vscale x 8 x i32> %vec, ptr %
;
; LMULMAX1-LABEL: insert_nxv8i32_v8i32_8:
; LMULMAX1: # %bb.0:
-; LMULMAX1-NEXT: addi a1, a0, 16
; LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; LMULMAX1-NEXT: vle32.v v12, (a1)
+; LMULMAX1-NEXT: vle32.v v12, (a0)
+; LMULMAX1-NEXT: addi a0, a0, 16
; LMULMAX1-NEXT: vle32.v v16, (a0)
; LMULMAX1-NEXT: vsetivli zero, 12, e32, m4, tu, ma
-; LMULMAX1-NEXT: vslideup.vi v8, v16, 8
+; LMULMAX1-NEXT: vslideup.vi v8, v12, 8
; LMULMAX1-NEXT: vsetivli zero, 16, e32, m4, tu, ma
-; LMULMAX1-NEXT: vslideup.vi v8, v12, 12
+; LMULMAX1-NEXT: vslideup.vi v8, v16, 12
; LMULMAX1-NEXT: ret
%sv = load <8 x i32>, ptr %svp
%v = call <vscale x 8 x i32> @llvm.vector.insert.v8i32.nxv8i32(<vscale x 8 x i32> %vec, <8 x i32> %sv, i64 8)
@@ -166,7 +163,7 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) {
; LMULMAX2-NEXT: vle32.v v8, (a1)
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vle32.v v10, (a0)
-; LMULMAX2-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; LMULMAX2-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; LMULMAX2-NEXT: vmv.v.v v10, v8
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vse32.v v10, (a0)
@@ -197,7 +194,7 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) {
; LMULMAX2-NEXT: vle32.v v8, (a1)
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vle32.v v10, (a0)
-; LMULMAX2-NEXT: vsetivli zero, 4, e32, m2, tu, ma
+; LMULMAX2-NEXT: vsetivli zero, 4, e32, m1, tu, ma
; LMULMAX2-NEXT: vslideup.vi v10, v8, 2
; LMULMAX2-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; LMULMAX2-NEXT: vse32.v v10, (a0)
@@ -508,9 +505,9 @@ define void @insert_v2i64_nxv16i64(ptr %psv0, ptr %psv1, <vscale x 16 x i64>* %o
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vle64.v v16, (a1)
-; CHECK-NEXT: vsetivli zero, 6, e64, m8, tu, ma
-; CHECK-NEXT: vslideup.vi v8, v16, 4
+; CHECK-NEXT: vle64.v v12, (a1)
+; CHECK-NEXT: vsetivli zero, 6, e64, m4, tu, ma
+; CHECK-NEXT: vslideup.vi v8, v12, 4
; CHECK-NEXT: vs8r.v v8, (a2)
; CHECK-NEXT: ret
%sv0 = load <2 x i64>, ptr %psv0
@@ -539,7 +536,7 @@ define void @insert_v2i64_nxv16i64_lo2(ptr %psv, <vscale x 16 x i64>* %out) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
-; CHECK-NEXT: vsetivli zero, 4, e64, m8, ta, ma
+; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
; CHECK-NEXT: vslideup.vi v16, v8, 2
; CHECK-NEXT: vs8r.v v16, (a1)
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
index 373a96356a207e2..cbcca9d2696f4ba 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll
@@ -40,7 +40,7 @@ define <32 x i32> @insertelt_v32i32_0(<32 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v32i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%b = insertelement <32 x i32> %a, i32 %y, i32 0
@@ -54,7 +54,7 @@ define <32 x i32> @insertelt_v32i32_4(<32 x i32> %a, i32 %y) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
; CHECK-NEXT: vmv.s.x v16, a0
-; CHECK-NEXT: vsetivli zero, 5, e32, m8, tu, ma
+; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma
; CHECK-NEXT: vslideup.vi v8, v16, 4
; CHECK-NEXT: ret
%b = insertelement <32 x i32> %a, i32 %y, i32 4
@@ -92,7 +92,7 @@ define <64 x i32> @insertelt_v64i32_0(<64 x i32> %a, i32 %y) {
; CHECK-LABEL: insertelt_v64i32_0:
; CHECK: # %bb.0:
; CHECK-NEXT: li a1, 32
-; CHECK-NEXT: vsetvli zero, a1, e32, m8, tu, ma
+; CHECK-NEXT: vsetvli zero, a1, e32, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%b = insertelement <64 x i32> %a, i32 %y, i32 0
@@ -390,7 +390,7 @@ define <8 x i64> @insertelt_v8i64_0(<8 x i64> %a, ptr %x) {
; CHECK-LABEL: insertelt_v8i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, -1
-; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%b = insertelement <8 x i64> %a, i64 -1, i32 0
@@ -468,7 +468,7 @@ define <8 x i64> @insertelt_c6_v8i64_0(<8 x i64> %a, ptr %x) {
; CHECK-LABEL: insertelt_c6_v8i64_0:
; CHECK: # %bb.0:
; CHECK-NEXT: li a0, 6
-; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: ret
%b = insertelement <8 x i64> %a, i64 6, i32 0
@@ -550,9 +550,9 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) {
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v8, (a0)
; CHECK-NEXT: li a2, 6
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a2
-; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
; CHECK-NEXT: vle64.v v12, (a1)
; CHECK-NEXT: vadd.vv v8, v8, v12
; CHECK-NEXT: vse64.v v8, (a0)
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index f7352b4659e5a9b..5ae0884068f9bc3 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -2426,14 +2426,14 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr
; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load
; RV64ZVE32F-NEXT: ld a2, 0(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v8, a2
; RV64ZVE32F-NEXT: andi a2, a1, 2
; RV64ZVE32F-NEXT: beqz a2, .LBB34_2
; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1
; RV64ZVE32F-NEXT: ld a2, 8(a0)
; RV64ZVE32F-NEXT: lw a2, 0(a2)
-; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m2, tu, ma
+; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma
; RV64ZVE32F-NEXT: vmv.s.x v10, a2
; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1
; RV64ZVE32F-NEXT: andi a2, a1, 4
@@ -2441,7 +2441,7 @@ define <8 x i32> @mgather...
<truncated>
</pre>
</details>
https://github.com/llvm/llvm-project/pull/66087
More information about the llvm-commits
mailing list