[llvm] [RISCV] Narrow indices of fixed vector gather/scatter nodes (PR #66405)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 14 10:31:38 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
<details>
<summary>Changes</summary>
Review wise, I'm trying something different. This PR is what would have been three stacked changes in phabricator. Having different PRs (and thus duplicated commits) seemed like a major headache, so I'm posting all three changes in one PR. I will land them individually after review. Feel free to LGTM individual changes (please be clear on which), and I will rebase as appropriate.
--
Patch is 62.45 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/66405.diff
12 Files Affected:
- (modified) llvm/include/llvm/CodeGen/TargetLowering.h (+3-3)
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+6-9)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+2-1)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+1-1)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+55-34)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.h (+1-1)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll (+66-66)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll (+66-66)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll (+82-90)
- (modified) llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll (+60-64)
- (modified) llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll (+2-4)
- (modified) llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll (+6-8)
<pre>
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 12b280d5b1a0bcd..4879c0c5dcff10b 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1460,9 +1460,9 @@ class TargetLoweringBase {
/// extending
virtual bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const { return false; }
- // Returns true if VT is a legal index type for masked gathers/scatters
- // on this target
- virtual bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const {
+ // Returns true if Extend can be folded into the index of a masked gathers/scatters
+ // on this target.
+ virtual bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const {
return false;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index cd34c0dce0f95a0..5d0d70a40e408b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11680,10 +11680,9 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
// It's always safe to look through zero extends.
if (Index.getOpcode() == ISD::ZERO_EXTEND) {
- SDValue Op = Index.getOperand(0);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
+ if (TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
IndexType = ISD::UNSIGNED_SCALED;
- Index = Op;
+ Index = Index.getOperand(0);
return true;
}
if (ISD::isIndexTypeSigned(IndexType)) {
@@ -11694,12 +11693,10 @@ bool refineIndexType(SDValue &Index, ISD::MemIndexType &IndexType, EVT DataVT,
// It's only safe to look through sign extends when Index is signed.
if (Index.getOpcode() == ISD::SIGN_EXTEND &&
- ISD::isIndexTypeSigned(IndexType)) {
- SDValue Op = Index.getOperand(0);
- if (TLI.shouldRemoveExtendFromGSIndex(Op.getValueType(), DataVT)) {
- Index = Op;
- return true;
- }
+ ISD::isIndexTypeSigned(IndexType) &&
+ TLI.shouldRemoveExtendFromGSIndex(Index, DataVT)) {
+ Index = Index.getOperand(0);
+ return true;
}
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c65c52e39201ac6..337fe80d0a9018d 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5352,8 +5352,9 @@ bool AArch64TargetLowering::shouldExtendGSIndex(EVT VT, EVT &EltTy) const {
return false;
}
-bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+bool AArch64TargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
EVT DataVT) const {
+ const EVT IndexVT = Extend.getOperand(0).getValueType();
// SVE only supports implicit extension of 32-bit indices.
if (!Subtarget->hasSVE() || IndexVT.getVectorElementType() != MVT::i32)
return false;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 67c344318e0d3ec..32970e9e45dec8f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -1185,7 +1185,7 @@ class AArch64TargetLowering : public TargetLowering {
SelectionDAG &DAG) const override;
bool shouldExtendGSIndex(EVT VT, EVT &EltTy) const override;
- bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
+ bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
bool isVectorLoadExtDesirable(SDValue ExtVal) const override;
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(const CallInst *CI) const override;
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a470ceae90ce591..25fe102283995cf 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11620,21 +11620,24 @@ static SDValue performXORCombine(SDNode *N, SelectionDAG &DAG,
// zero-extended their indices, \p narrowIndex tries to narrow the type of index
// operand if it is matched to pattern (shl (zext x to ty), C) and bits(x) + C <
// bits(ty).
-static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) {
+static bool narrowIndex(SDValue &N, ISD::MemIndexType IndexType, SelectionDAG &DAG) {
+ if (isIndexTypeSigned(IndexType))
+ return false;
+
if (N.getOpcode() != ISD::SHL || !N->hasOneUse())
- return SDValue();
+ return false;
SDValue N0 = N.getOperand(0);
if (N0.getOpcode() != ISD::ZERO_EXTEND &&
N0.getOpcode() != RISCVISD::VZEXT_VL)
- return SDValue();
+ return false;;
if (!N0->hasOneUse())
- return SDValue();
+ return false;;
APInt ShAmt;
SDValue N1 = N.getOperand(1);
if (!ISD::isConstantSplatVector(N1.getNode(), ShAmt))
- return SDValue();
+ return false;;
SDLoc DL(N);
SDValue Src = N0.getOperand(0);
@@ -11646,14 +11649,15 @@ static SDValue narrowIndex(SDValue N, SelectionDAG &DAG) {
// Skip if NewElen is not narrower than the original extended type.
if (NewElen >= N0.getValueType().getScalarSizeInBits())
- return SDValue();
+ return false;
EVT NewEltVT = EVT::getIntegerVT(*DAG.getContext(), NewElen);
EVT NewVT = SrcVT.changeVectorElementType(NewEltVT);
SDValue NewExt = DAG.getNode(N0->getOpcode(), DL, NewVT, N0->ops());
SDValue NewShAmtVec = DAG.getConstant(ShAmtV, DL, NewVT);
- return DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
+ N = DAG.getNode(ISD::SHL, DL, NewVT, NewExt, NewShAmtVec);
+ return true;
}
// Replace (seteq (i64 (and X, 0xffffffff)), C1) with
@@ -13493,19 +13497,20 @@ static bool legalizeScatterGatherIndexType(SDLoc DL, SDValue &Index,
DAG.getMachineFunction().getSubtarget<RISCVSubtarget>().getXLenVT();
const EVT IndexVT = Index.getValueType();
- const bool IsIndexSigned = isIndexTypeSigned(IndexType);
// RISC-V indexed loads only support the "unsigned unscaled" addressing
// mode, so anything else must be manually legalized.
- if (!IsIndexSigned || !IndexVT.getVectorElementType().bitsLT(XLenVT))
+ if (!isIndexTypeSigned(IndexType))
return false;
- // Any index legalization should first promote to XLenVT, so we don't lose
- // bits when scaling. This may create an illegal index type so we let
- // LLVM's legalization take care of the splitting.
- // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
- Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
- IndexVT.changeVectorElementType(XLenVT), Index);
+ if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
+ // Any index legalization should first promote to XLenVT, so we don't lose
+ // bits when scaling. This may create an illegal index type so we let
+ // LLVM's legalization take care of the splitting.
+ // FIXME: LLVM can't split VP_GATHER or VP_SCATTER yet.
+ Index = DAG.getNode(ISD::SIGN_EXTEND, DL,
+ IndexVT.changeVectorElementType(XLenVT), Index);
+ }
IndexType = ISD::UNSIGNED_SCALED;
return true;
}
@@ -13870,6 +13875,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
{MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
MGN->getBasePtr(), Index, ScaleOp},
MGN->getMemOperand(), IndexType, MGN->getExtensionType());
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getMaskedGather(
+ N->getVTList(), MGN->getMemoryVT(), DL,
+ {MGN->getChain(), MGN->getPassThru(), MGN->getMask(),
+ MGN->getBasePtr(), Index, ScaleOp},
+ MGN->getMemOperand(), IndexType, MGN->getExtensionType());
break;
}
case ISD::MSCATTER:{
@@ -13887,6 +13899,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
{MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
Index, ScaleOp},
MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getMaskedScatter(
+ N->getVTList(), MSN->getMemoryVT(), DL,
+ {MSN->getChain(), MSN->getValue(), MSN->getMask(), MSN->getBasePtr(),
+ Index, ScaleOp},
+ MSN->getMemOperand(), IndexType, MSN->isTruncatingStore());
break;
}
case ISD::VP_GATHER: {
@@ -13904,6 +13923,14 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
ScaleOp, VPGN->getMask(),
VPGN->getVectorLength()},
VPGN->getMemOperand(), IndexType);
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getGatherVP(N->getVTList(), VPGN->getMemoryVT(), DL,
+ {VPGN->getChain(), VPGN->getBasePtr(), Index,
+ ScaleOp, VPGN->getMask(),
+ VPGN->getVectorLength()},
+ VPGN->getMemOperand(), IndexType);
+
break;
}
case ISD::VP_SCATTER: {
@@ -13921,6 +13948,13 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
VPSN->getBasePtr(), Index, ScaleOp,
VPSN->getMask(), VPSN->getVectorLength()},
VPSN->getMemOperand(), IndexType);
+
+ if (narrowIndex(Index, IndexType, DAG))
+ return DAG.getScatterVP(N->getVTList(), VPSN->getMemoryVT(), DL,
+ {VPSN->getChain(), VPSN->getValue(),
+ VPSN->getBasePtr(), Index, ScaleOp,
+ VPSN->getMask(), VPSN->getVectorLength()},
+ VPSN->getMemOperand(), IndexType);
break;
}
case RISCVISD::SRA_VL:
@@ -14225,23 +14259,6 @@ SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
return DAG.getConstant(-1, DL, VT);
return DAG.getConstant(0, DL, VT);
}
- case Intrinsic::riscv_vloxei:
- case Intrinsic::riscv_vloxei_mask:
- case Intrinsic::riscv_vluxei:
- case Intrinsic::riscv_vluxei_mask:
- case Intrinsic::riscv_vsoxei:
- case Intrinsic::riscv_vsoxei_mask:
- case Intrinsic::riscv_vsuxei:
- case Intrinsic::riscv_vsuxei_mask:
- if (SDValue V = narrowIndex(N->getOperand(4), DAG)) {
- SmallVector<SDValue, 8> Ops(N->ops());
- Ops[4] = V;
- const auto *MemSD = cast<MemIntrinsicSDNode>(N);
- return DAG.getMemIntrinsicNode(N->getOpcode(), SDLoc(N), N->getVTList(),
- Ops, MemSD->getMemoryVT(),
- MemSD->getMemOperand());
- }
- return SDValue();
}
}
case ISD::BITCAST: {
@@ -17677,9 +17694,13 @@ Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
return Result;
}
-bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT IndexVT,
+bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(SDValue Extend,
EVT DataVT) const {
- return false;
+ // We have indexed loads for all legal index types. Indices are always
+ // zero extended
+ return Extend.getOpcode() == ISD::ZERO_EXTEND &&
+ isTypeLegal(Extend.getValueType()) &&
+ isTypeLegal(Extend.getOperand(0).getValueType());
}
bool RISCVTargetLowering::shouldConvertFpToSat(unsigned Op, EVT FPVT,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 461b929643f2688..695cbaf886f8aeb 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -747,7 +747,7 @@ class RISCVTargetLowering : public TargetLowering {
const RISCVRegisterInfo *TRI);
MVT getContainerForFixedLengthVector(MVT VT) const;
- bool shouldRemoveExtendFromGSIndex(EVT IndexVT, EVT DataVT) const override;
+ bool shouldRemoveExtendFromGSIndex(SDValue Extend, EVT DataVT) const override;
bool isLegalElementTypeForRVV(EVT ScalarTy) const;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
index f3af177ac0ff27e..6c6ffe656f433b4 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll
@@ -1716,21 +1716,19 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8
define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i16> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vzext.vf4 v10, v8
-; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vwaddu.vv v10, v8, v8
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vzext.vf8 v12, v8
-; RV64V-NEXT: vadd.vv v12, v12, v12
+; RV64V-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV64V-NEXT: vwaddu.vv v10, v8, v8
; RV64V-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV64V-NEXT: vluxei64.v v9, (a0), v12, v0.t
+; RV64V-NEXT: vluxei16.v v9, (a0), v10, v0.t
; RV64V-NEXT: vmv.v.v v8, v9
; RV64V-NEXT: ret
;
@@ -2793,20 +2791,21 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8
define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i32> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8i32:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, mu
-; RV32-NEXT: vzext.vf4 v12, v8
-; RV32-NEXT: vsll.vi v8, v12, 2
-; RV32-NEXT: vluxei32.v v10, (a0), v8, v0.t
+; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32-NEXT: vzext.vf2 v9, v8
+; RV32-NEXT: vsll.vi v8, v9, 2
+; RV32-NEXT: vsetvli zero, zero, e32, m2, ta, mu
+; RV32-NEXT: vluxei16.v v10, (a0), v8, v0.t
; RV32-NEXT: vmv.v.v v8, v10
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i32:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vzext.vf8 v12, v8
-; RV64V-NEXT: vsll.vi v12, v12, 2
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vzext.vf2 v9, v8
+; RV64V-NEXT: vsll.vi v8, v9, 2
; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
+; RV64V-NEXT: vluxei16.v v10, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
@@ -3264,11 +3263,10 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i32:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, ma
-; RV64V-NEXT: vzext.vf4 v12, v8
-; RV64V-NEXT: vsll.vi v12, v12, 2
-; RV64V-NEXT: vsetvli zero, zero, e32, m2, ta, mu
-; RV64V-NEXT: vluxei64.v v10, (a0), v12, v0.t
+; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; RV64V-NEXT: vzext.vf2 v12, v8
+; RV64V-NEXT: vsll.vi v8, v12, 2
+; RV64V-NEXT: vluxei32.v v10, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v10
; RV64V-NEXT: ret
;
@@ -4772,20 +4770,21 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8
define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x i64> %passthru) {
; RV32V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV32V: # %bb.0:
-; RV32V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32V-NEXT: vzext.vf4 v10, v8
-; RV32V-NEXT: vsll.vi v8, v10, 3
+; RV32V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV32V-NEXT: vzext.vf2 v9, v8
+; RV32V-NEXT: vsll.vi v8, v9, 3
; RV32V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32V-NEXT: vluxei32.v v12, (a0), v8, v0.t
+; RV32V-NEXT: vluxei16.v v12, (a0), v8, v0.t
; RV32V-NEXT: vmv.v.v v8, v12
; RV32V-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8i64:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64V-NEXT: vzext.vf8 v16, v8
-; RV64V-NEXT: vsll.vi v8, v16, 3
-; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
+; RV64V-NEXT: vsetivli zero, 8, e16, m1, ta, ma
+; RV64V-NEXT: vzext.vf2 v9, v8
+; RV64V-NEXT: vsll.vi v8, v9, 3
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
+; RV64V-NEXT: vluxei16.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
@@ -5616,10 +5615,11 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <
;
; RV64V-LABEL: mgather_baseidx_zext_v8i16_v8i64:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivli zero, 8, e64, m4, ta, mu
-; RV64V-NEXT: vzext.vf4 v16, v8
-; RV64V-NEXT: vsll.vi v8, v16, 3
-; RV64V-NEXT: vluxei64.v v12, (a0), v8, v0.t
+; RV64V-NEXT: vsetivli zero, 8, e32, m2, ta, ma
+; RV64V-NEXT: vzext.vf2 v10, v8
+; RV64V-NEXT: vsll.vi v8, v10, 3
+; RV64V-NEXT: vsetvli zero, zero, e64, m4, ta, mu
+; RV64V-NEXT: vluxei32.v v12, (a0), v8, v0.t
; RV64V-NEXT: vmv.v.v v8, v12
; RV64V-NEXT: ret
;
@@ -7645,21 +7645,19 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8
define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 x half> %passthru) {
; RV32-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV32: # %bb.0:
-; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma
-; RV32-NEXT: vzext.vf4 v10, v8
-; RV32-NEXT: vadd.vv v10, v10, v10
+; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
+; RV32-NEXT: vwaddu.vv v10, v8, v8
; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vluxei32.v v9, (a0), v10, v0.t
+; RV32-NEXT: vluxei16.v v9, (a0), v10, v0.t
; RV32-NEXT: vmv.v.v v8, v9
; RV32-NEXT: ret
;
; RV64V-LABEL: mgather_baseidx_zext_v8i8_v8f16:
; RV64V: # %bb.0:
-; RV64V-NEXT: vsetivl...
<truncated>
</pre>
</details>
https://github.com/llvm/llvm-project/pull/66405
More information about the llvm-commits
mailing list