[llvm] [RISCV] Lower a shuffle which is nearly identity except one replicated element (PR #135292)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 10 18:11:14 PDT 2025
https://github.com/preames created https://github.com/llvm/llvm-project/pull/135292
This can be done with a vrgather.vi/vx, and (possibly) a register move. The alternative is to do a vrgather.vv with a full width index vector.
We'd already caught the two operands forms of this shuffle; this patch specifically handles the single operand form.
Unfortunately only in abstract, it would be nice if we canonicalized shuffles in some way wouldn't it?
>From 63cf771523879c02d664d68423cf6fd85415765f Mon Sep 17 00:00:00 2001
From: Philip Reames <preames at rivosinc.com>
Date: Thu, 10 Apr 2025 13:26:57 -0700
Subject: [PATCH] [RISCV] Lower a shuffle which is nearly identity except one
replicated elem
This can be done with a vrgather.vi/vx, and (possibly) a register move.
The alternative is to do a vrgather.vv with a full width index vector.
We'd already caught the two operands forms of this shuffle; this patch
specifically handles the single operand form.
Unfortunately only in abstract, it would be nice if we canonicalized
shuffles in some way wouldn't it?
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 44 +++++++++++++++++++
.../RISCV/rvv/fixed-vectors-shuffle-int.ll | 33 +++++++-------
2 files changed, 60 insertions(+), 17 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index f7d192756fd56..fd0562d141796 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4726,6 +4726,47 @@ static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT,
DAG.getVectorIdxConstant(0, DL));
}
+/// Match a single source shuffle which is an identity except that some
+/// particular element is repeated. This can be lowered as a masked
+/// vrgather.vi/vx. Note that the two source form of this is handled
+/// by the recursive splitting logic and doesn't need special handling.
+static SDValue lowerVECTOR_SHUFFLEAsVRGatherVX(ShuffleVectorSDNode *SVN,
+ const RISCVSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+
+ SDLoc DL(SVN);
+ MVT VT = SVN->getSimpleValueType(0);
+ SDValue V1 = SVN->getOperand(0);
+ assert(SVN->getOperand(1).isUndef());
+ ArrayRef<int> Mask = SVN->getMask();
+ const unsigned NumElts = VT.getVectorNumElements();
+ MVT XLenVT = Subtarget.getXLenVT();
+
+ std::optional<int> SplatIdx;
+ for (auto [I, M] : enumerate(Mask)) {
+ if (M == -1 || I == (unsigned)M)
+ continue;
+ if (SplatIdx && *SplatIdx != M)
+ return SDValue();
+ SplatIdx = M;
+ }
+
+ if (!SplatIdx)
+ return SDValue();
+
+ SmallVector<SDValue> MaskVals;
+ for (int MaskIndex : Mask) {
+ bool SelectMaskVal = MaskIndex == *SplatIdx;
+ MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
+ }
+ assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
+ SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
+ SDValue Splat = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT),
+ SmallVector<int>(NumElts, *SplatIdx));
+ return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, Splat, V1);
+}
+
// Lower the following shuffle to vslidedown.
// a)
// t49: v8i8 = extract_subvector t13, Constant:i64<0>
@@ -5852,6 +5893,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
return V;
+ if (SDValue V = lowerVECTOR_SHUFFLEAsVRGatherVX(SVN, Subtarget, DAG))
+ return V;
+
// Match a spread(4,8) which can be done via extend and shift. Spread(2)
// is fully covered in interleave(2) above, so it is ignored here.
if (VT.getScalarSizeInBits() < Subtarget.getELen()) {
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
index b26fc5653afec..e6375e276d37f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int.ll
@@ -1419,11 +1419,11 @@ define <8 x i32> @shuffle_v8i32_locally_repeating_neg(<8 x i32> %a) {
define <8 x i8> @identity_splat0(<8 x i8> %v) {
; CHECK-LABEL: identity_splat0:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI88_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI88_0)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v10, (a0)
-; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: li a0, 25
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vrgather.vi v9, v8, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%shuf = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 0, i32 0, i32 5, i32 6, i32 7>
@@ -1433,11 +1433,11 @@ define <8 x i8> @identity_splat0(<8 x i8> %v) {
define <8 x i8> @identity_splat2(<8 x i8> %v) {
; CHECK-LABEL: identity_splat2:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a0, %hi(.LCPI89_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI89_0)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v10, (a0)
-; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: li a0, 28
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vmv.s.x v0, a0
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vrgather.vi v9, v8, 2, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%shuf = shufflevector <8 x i8> %v, <8 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 2, i32 5, i32 6, i32 7>
@@ -1448,14 +1448,13 @@ define <8 x i8> @identity_splat2(<8 x i8> %v) {
define <8 x i8> @vmerge_vxm(<8 x i8> %v, i8 %s) {
; CHECK-LABEL: vmerge_vxm:
; CHECK: # %bb.0:
-; CHECK-NEXT: lui a1, %hi(.LCPI90_0)
-; CHECK-NEXT: addi a1, a1, %lo(.LCPI90_0)
-; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma
-; CHECK-NEXT: vle8.v v10, (a1)
-; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma
+; CHECK-NEXT: li a1, 25
+; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma
; CHECK-NEXT: vmv.s.x v8, a0
-; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma
-; CHECK-NEXT: vrgather.vv v9, v8, v10
+; CHECK-NEXT: vmv.s.x v0, a1
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
+; CHECK-NEXT: vrgather.vi v9, v8, 0, v0.t
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%ins = insertelement <8 x i8> %v, i8 %s, i32 0
More information about the llvm-commits
mailing list