[llvm] 2baa1df - [RISCV] Add basic support for matching shuffles to vslidedown.vi.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 14 09:09:39 PST 2022
Author: Craig Topper
Date: 2022-01-14T09:04:54-08:00
New Revision: 2baa1dffd1fb03c2a88d7116252aca76a29d0026
URL: https://github.com/llvm/llvm-project/commit/2baa1dffd1fb03c2a88d7116252aca76a29d0026
DIFF: https://github.com/llvm/llvm-project/commit/2baa1dffd1fb03c2a88d7116252aca76a29d0026.diff
LOG: [RISCV] Add basic support for matching shuffles to vslidedown.vi.
Specifically the unary shuffle case where the elements being
shifted in are undef. This handles the shuffles produce by expanding
llvm.reduce.mul.
I did not reduce the VL which would increase the number of vsetvlis,
but may improve the execution speed. We'd also want to narrow the
multiplies so we could share vsetvlis between the vslidedown.vi and
the next multiply.
Reviewed By: frasercrmck
Differential Revision: https://reviews.llvm.org/D117239
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7d9d0774c2255..7ec88e8783d56 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2297,6 +2297,35 @@ static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
}
+// Is the mask a slidedown that shifts in undefs.
+static int matchShuffleAsSlideDown(ArrayRef<int> Mask) {
+ int Size = Mask.size();
+
+ // Elements shifted in should be undef.
+ auto CheckUndefs = [&](int Shift) {
+ for (int i = Size - Shift; i != Size; ++i)
+ if (Mask[i] >= 0)
+ return false;
+ return true;
+ };
+
+ // Elements should be shifted or undef.
+ auto MatchShift = [&](int Shift) {
+ for (int i = 0; i != Size - Shift; ++i)
+ if (Mask[i] >= 0 && Mask[i] != Shift + i)
+ return false;
+ return true;
+ };
+
+ // Try all possible shifts.
+ for (int Shift = 1; Shift != Size; ++Shift)
+ if (CheckUndefs(Shift) && MatchShift(Shift))
+ return Shift;
+
+ // No match.
+ return -1;
+}
+
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
@@ -2382,6 +2411,20 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
}
}
+ // Try to match as a slidedown.
+ int SlideAmt = matchShuffleAsSlideDown(SVN->getMask());
+ if (SlideAmt >= 0) {
+ // TODO: Should we reduce the VL to account for the upper undef elements?
+ // Requires additional vsetvlis, but might be faster to execute.
+ V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
+ SDValue SlideDown =
+ DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), V1,
+ DAG.getConstant(SlideAmt, DL, XLenVT),
+ TrueMask, VL);
+ return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
+ }
+
// Detect shuffles which can be re-expressed as vector selects; these are
// shuffles in which each element in the destination is taken from an element
// at the corresponding index in either source vectors.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index 3118a88818ce4..81ebc71f77934 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -235,3 +235,23 @@ define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) {
%s = shufflevector <4 x double> %x, <4 x double> <double 2.0, double 2.0, double 2.0, double 2.0>, <4 x i32> <i32 0, i32 3, i32 6, i32 5>
ret <4 x double> %s
}
+
+define <4 x half> @slidedown_v4f16(<4 x half> %x) {
+; CHECK-LABEL: slidedown_v4f16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: ret
+ %s = shufflevector <4 x half> %x, <4 x half> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
+ ret <4 x half> %s
+}
+
+define <8 x float> @slidedown_v8f32(<8 x float> %x) {
+; CHECK-LABEL: slidedown_v8f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 3
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x float> %s
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 67e70969c42a1..31b9a346ece12 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -541,3 +541,23 @@ define <8 x i8> @widen_splat_ve3(<4 x i8> %v) {
%shuf = shufflevector <4 x i8> %v, <4 x i8> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
ret <8 x i8> %shuf
}
+
+define <4 x i16> @slidedown_v4i16(<4 x i16> %x) {
+; CHECK-LABEL: slidedown_v4i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 1
+; CHECK-NEXT: ret
+ %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> <i32 1, i32 2, i32 3, i32 undef>
+ ret <4 x i16> %s
+}
+
+define <8 x i32> @slidedown_v8i32(<8 x i32> %x) {
+; CHECK-LABEL: slidedown_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 3
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <8 x i32> %s
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
index 7d3698cbb0627..8ee206dd8e164 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll
@@ -5584,10 +5584,8 @@ define i8 @vreduce_mul_v4i8(<4 x i8>* %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vi v9, v9, 2
-; CHECK-NEXT: vrgather.vv v10, v8, v9
-; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vrgather.vi v9, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5604,13 +5602,10 @@ define i8 @vreduce_mul_v8i8(<8 x i8>* %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vi v10, v9, 4
-; CHECK-NEXT: vrgather.vv v11, v8, v10
-; CHECK-NEXT: vmul.vv v8, v8, v11
-; CHECK-NEXT: vadd.vi v9, v9, 2
-; CHECK-NEXT: vrgather.vv v10, v8, v9
-; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vrgather.vi v9, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5627,16 +5622,12 @@ define i8 @vreduce_mul_v16i8(<16 x i8>* %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vi v10, v9, 8
-; CHECK-NEXT: vrgather.vv v11, v8, v10
-; CHECK-NEXT: vmul.vv v8, v8, v11
-; CHECK-NEXT: vadd.vi v10, v9, 4
-; CHECK-NEXT: vrgather.vv v11, v8, v10
-; CHECK-NEXT: vmul.vv v8, v8, v11
-; CHECK-NEXT: vadd.vi v9, v9, 2
-; CHECK-NEXT: vrgather.vv v10, v8, v9
-; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vrgather.vi v9, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5654,21 +5645,14 @@ define i8 @vreduce_mul_v32i8(<32 x i8>* %x) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI253_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI253_0)
-; CHECK-NEXT: vle8.v v10, (a0)
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vmul.vv v8, v8, v12
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vadd.vi v12, v10, 8
-; CHECK-NEXT: vrgather.vv v14, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v14
-; CHECK-NEXT: vadd.vi v12, v10, 4
-; CHECK-NEXT: vrgather.vv v14, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v14
-; CHECK-NEXT: vadd.vi v10, v10, 2
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v10, v8, 16
+; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v10, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v10
; CHECK-NEXT: vrgather.vi v10, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5686,26 +5670,17 @@ define i8 @vreduce_mul_v64i8(<64 x i8>* %x) {
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI254_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI254_0)
-; CHECK-NEXT: vle8.v v12, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI254_1)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI254_1)
-; CHECK-NEXT: vle8.v v16, (a0)
-; CHECK-NEXT: vrgather.vv v20, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v20
-; CHECK-NEXT: vrgather.vv v12, v8, v16
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vslidedown.vx v12, v8, a0
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v12, v8, 16
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v12, v8, 2
; CHECK-NEXT: vmul.vv v8, v8, v12
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vadd.vi v16, v12, 8
-; CHECK-NEXT: vrgather.vv v20, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v20
-; CHECK-NEXT: vadd.vi v16, v12, 4
-; CHECK-NEXT: vrgather.vv v20, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v20
-; CHECK-NEXT: vadd.vi v12, v12, 2
-; CHECK-NEXT: vrgather.vv v16, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vrgather.vi v12, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5723,31 +5698,20 @@ define i8 @vreduce_mul_v128i8(<128 x i8>* %x) {
; CHECK-NEXT: li a1, 128
; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu
; CHECK-NEXT: vle8.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI255_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI255_0)
-; CHECK-NEXT: vle8.v v24, (a0)
-; CHECK-NEXT: vrgather.vv v16, v8, v24
-; CHECK-NEXT: lui a0, %hi(.LCPI255_1)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI255_1)
-; CHECK-NEXT: vle8.v v24, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI255_2)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI255_2)
-; CHECK-NEXT: vle8.v v0, (a0)
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vrgather.vv v16, v8, v0
+; CHECK-NEXT: vslidedown.vi v16, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 2
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vadd.vi v24, v16, 8
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v24, v16, 4
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v16, v16, 2
-; CHECK-NEXT: vrgather.vv v24, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v24
; CHECK-NEXT: vrgather.vi v16, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5767,32 +5731,21 @@ define i8 @vreduce_mul_v256i8(<256 x i8>* %x) {
; CHECK-NEXT: vle8.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle8.v v16, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI256_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI256_0)
-; CHECK-NEXT: vle8.v v24, (a0)
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vrgather.vv v16, v8, v24
-; CHECK-NEXT: lui a0, %hi(.LCPI256_1)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI256_1)
-; CHECK-NEXT: vle8.v v24, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI256_2)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI256_2)
-; CHECK-NEXT: vle8.v v0, (a0)
+; CHECK-NEXT: li a0, 64
+; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vrgather.vv v16, v8, v0
+; CHECK-NEXT: vslidedown.vi v16, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 2
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vadd.vi v24, v16, 8
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v24, v16, 4
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v16, v16, 2
-; CHECK-NEXT: vrgather.vv v24, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v24
; CHECK-NEXT: vrgather.vi v16, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5839,10 +5792,8 @@ define i16 @vreduce_mul_v4i16(<4 x i16>* %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vi v9, v9, 2
-; CHECK-NEXT: vrgather.vv v10, v8, v9
-; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vrgather.vi v9, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5859,13 +5810,10 @@ define i16 @vreduce_mul_v8i16(<8 x i16>* %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vi v10, v9, 4
-; CHECK-NEXT: vrgather.vv v11, v8, v10
-; CHECK-NEXT: vmul.vv v8, v8, v11
-; CHECK-NEXT: vadd.vi v9, v9, 2
-; CHECK-NEXT: vrgather.vv v10, v8, v9
-; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v9
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vrgather.vi v9, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5882,16 +5830,12 @@ define i16 @vreduce_mul_v16i16(<16 x i16>* %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vadd.vi v12, v10, 8
-; CHECK-NEXT: vrgather.vv v14, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v14
-; CHECK-NEXT: vadd.vi v12, v10, 4
-; CHECK-NEXT: vrgather.vv v14, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v14
-; CHECK-NEXT: vadd.vi v10, v10, 2
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v10, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v10
; CHECK-NEXT: vrgather.vi v10, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5909,21 +5853,14 @@ define i16 @vreduce_mul_v32i16(<32 x i16>* %x) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI262_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI262_0)
-; CHECK-NEXT: vle16.v v12, (a0)
-; CHECK-NEXT: vrgather.vv v16, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vadd.vi v16, v12, 8
-; CHECK-NEXT: vrgather.vv v20, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v20
-; CHECK-NEXT: vadd.vi v16, v12, 4
-; CHECK-NEXT: vrgather.vv v20, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v20
-; CHECK-NEXT: vadd.vi v12, v12, 2
-; CHECK-NEXT: vrgather.vv v16, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v12, v8, 16
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v12, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v12
; CHECK-NEXT: vrgather.vi v12, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5941,26 +5878,17 @@ define i16 @vreduce_mul_v64i16(<64 x i16>* %x) {
; CHECK-NEXT: li a1, 64
; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu
; CHECK-NEXT: vle16.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI263_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI263_0)
-; CHECK-NEXT: vle16.v v16, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI263_1)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI263_1)
-; CHECK-NEXT: vle16.v v24, (a0)
-; CHECK-NEXT: vrgather.vv v0, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 2
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vadd.vi v24, v16, 8
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v24, v16, 4
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v16, v16, 2
-; CHECK-NEXT: vrgather.vv v24, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v24
; CHECK-NEXT: vrgather.vi v16, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
@@ -5980,27 +5908,18 @@ define i16 @vreduce_mul_v128i16(<128 x i16>* %x) {
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle16.v v16, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI264_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI264_0)
-; CHECK-NEXT: vle16.v v24, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI264_1)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI264_1)
-; CHECK-NEXT: vle16.v v0, (a0)
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: li a0, 32
+; CHECK-NEXT: vslidedown.vx v16, v8, a0
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vrgather.vv v16, v8, v0
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 2
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vadd.vi v24, v16, 8
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v24, v16, 4
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v16, v16, 2
-; CHECK-NEXT: vrgather.vv v24, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v24
; CHECK-NEXT: vrgather.vi v16, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
@@ -6047,10 +5966,8 @@ define i32 @vreduce_mul_v4i32(<4 x i32>* %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vi v9, v9, 2
-; CHECK-NEXT: vrgather.vv v10, v8, v9
-; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vrgather.vi v9, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v9
; CHECK-NEXT: vmv.x.s a0, v8
@@ -6067,13 +5984,10 @@ define i32 @vreduce_mul_v8i32(<8 x i32>* %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vid.v v10
-; CHECK-NEXT: vadd.vi v12, v10, 4
-; CHECK-NEXT: vrgather.vv v14, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v14
-; CHECK-NEXT: vadd.vi v10, v10, 2
-; CHECK-NEXT: vrgather.vv v12, v8, v10
-; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v10, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v10
+; CHECK-NEXT: vslidedown.vi v10, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v10
; CHECK-NEXT: vrgather.vi v10, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v10
; CHECK-NEXT: vmv.x.s a0, v8
@@ -6090,16 +6004,12 @@ define i32 @vreduce_mul_v16i32(<16 x i32>* %x) {
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: vid.v v12
-; CHECK-NEXT: vadd.vi v16, v12, 8
-; CHECK-NEXT: vrgather.vv v20, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v20
-; CHECK-NEXT: vadd.vi v16, v12, 4
-; CHECK-NEXT: vrgather.vv v20, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v20
-; CHECK-NEXT: vadd.vi v12, v12, 2
-; CHECK-NEXT: vrgather.vv v16, v8, v12
-; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v12, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v12, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v12
+; CHECK-NEXT: vslidedown.vi v12, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v12
; CHECK-NEXT: vrgather.vi v12, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v12
; CHECK-NEXT: vmv.x.s a0, v8
@@ -6117,21 +6027,14 @@ define i32 @vreduce_mul_v32i32(<32 x i32>* %x) {
; CHECK-NEXT: li a1, 32
; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu
; CHECK-NEXT: vle32.v v8, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI270_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI270_0)
-; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: vrgather.vv v24, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v24
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vadd.vi v24, v16, 8
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v24, v16, 4
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v16, v16, 2
-; CHECK-NEXT: vrgather.vv v24, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v24
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 2
+; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vrgather.vi v16, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
@@ -6151,22 +6054,15 @@ define i32 @vreduce_mul_v64i32(<64 x i32>* %x) {
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: addi a0, a0, 128
; CHECK-NEXT: vle32.v v16, (a0)
-; CHECK-NEXT: lui a0, %hi(.LCPI271_0)
-; CHECK-NEXT: addi a0, a0, %lo(.LCPI271_0)
-; CHECK-NEXT: vle32.v v24, (a0)
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vrgather.vv v16, v8, v24
+; CHECK-NEXT: vslidedown.vi v16, v8, 16
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 8
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 4
+; CHECK-NEXT: vmul.vv v8, v8, v16
+; CHECK-NEXT: vslidedown.vi v16, v8, 2
; CHECK-NEXT: vmul.vv v8, v8, v16
-; CHECK-NEXT: vid.v v16
-; CHECK-NEXT: vadd.vi v24, v16, 8
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v24, v16, 4
-; CHECK-NEXT: vrgather.vv v0, v8, v24
-; CHECK-NEXT: vmul.vv v8, v8, v0
-; CHECK-NEXT: vadd.vi v16, v16, 2
-; CHECK-NEXT: vrgather.vv v24, v8, v16
-; CHECK-NEXT: vmul.vv v8, v8, v24
; CHECK-NEXT: vrgather.vi v16, v8, 1
; CHECK-NEXT: vmul.vv v8, v8, v16
; CHECK-NEXT: vmv.x.s a0, v8
@@ -6237,12 +6133,8 @@ define i64 @vreduce_mul_v4i64(<4 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, mu
-; RV32-NEXT: vid.v v10
-; RV32-NEXT: vadd.vi v10, v10, 2
-; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, mu
-; RV32-NEXT: vrgatherei16.vv v12, v8, v10
-; RV32-NEXT: vmul.vv v8, v8, v12
+; RV32-NEXT: vslidedown.vi v10, v8, 2
+; RV32-NEXT: vmul.vv v8, v8, v10
; RV32-NEXT: vrgather.vi v10, v8, 1
; RV32-NEXT: vmul.vv v8, v8, v10
; RV32-NEXT: vmv.x.s a0, v8
@@ -6256,10 +6148,8 @@ define i64 @vreduce_mul_v4i64(<4 x i64>* %x) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, mu
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vid.v v10
-; RV64-NEXT: vadd.vi v10, v10, 2
-; RV64-NEXT: vrgather.vv v12, v8, v10
-; RV64-NEXT: vmul.vv v8, v8, v12
+; RV64-NEXT: vslidedown.vi v10, v8, 2
+; RV64-NEXT: vmul.vv v8, v8, v10
; RV64-NEXT: vrgather.vi v10, v8, 1
; RV64-NEXT: vmul.vv v8, v8, v10
; RV64-NEXT: vmv.x.s a0, v8
@@ -6276,17 +6166,10 @@ define i64 @vreduce_mul_v8i64(<8 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vid.v v12
-; RV32-NEXT: vadd.vi v13, v12, 4
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vrgatherei16.vv v16, v8, v13
-; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, mu
-; RV32-NEXT: vadd.vi v12, v12, 2
-; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu
-; RV32-NEXT: vrgatherei16.vv v16, v8, v12
-; RV32-NEXT: vmul.vv v8, v8, v16
+; RV32-NEXT: vslidedown.vi v12, v8, 4
+; RV32-NEXT: vmul.vv v8, v8, v12
+; RV32-NEXT: vslidedown.vi v12, v8, 2
+; RV32-NEXT: vmul.vv v8, v8, v12
; RV32-NEXT: vrgather.vi v12, v8, 1
; RV32-NEXT: vmul.vv v8, v8, v12
; RV32-NEXT: vmv.x.s a0, v8
@@ -6300,13 +6183,10 @@ define i64 @vreduce_mul_v8i64(<8 x i64>* %x) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vid.v v12
-; RV64-NEXT: vadd.vi v16, v12, 4
-; RV64-NEXT: vrgather.vv v20, v8, v16
-; RV64-NEXT: vmul.vv v8, v8, v20
-; RV64-NEXT: vadd.vi v12, v12, 2
-; RV64-NEXT: vrgather.vv v16, v8, v12
-; RV64-NEXT: vmul.vv v8, v8, v16
+; RV64-NEXT: vslidedown.vi v12, v8, 4
+; RV64-NEXT: vmul.vv v8, v8, v12
+; RV64-NEXT: vslidedown.vi v12, v8, 2
+; RV64-NEXT: vmul.vv v8, v8, v12
; RV64-NEXT: vrgather.vi v12, v8, 1
; RV64-NEXT: vmul.vv v8, v8, v12
; RV64-NEXT: vmv.x.s a0, v8
@@ -6323,22 +6203,12 @@ define i64 @vreduce_mul_v16i64(<16 x i64>* %x) {
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; RV32-NEXT: vle64.v v8, (a0)
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT: vid.v v16
-; RV32-NEXT: vadd.vi v18, v16, 8
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v8, v18
-; RV32-NEXT: vmul.vv v8, v8, v24
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT: vadd.vi v18, v16, 4
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v8, v18
-; RV32-NEXT: vmul.vv v8, v8, v24
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT: vadd.vi v16, v16, 2
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v8, v16
-; RV32-NEXT: vmul.vv v8, v8, v24
+; RV32-NEXT: vslidedown.vi v16, v8, 8
+; RV32-NEXT: vmul.vv v8, v8, v16
+; RV32-NEXT: vslidedown.vi v16, v8, 4
+; RV32-NEXT: vmul.vv v8, v8, v16
+; RV32-NEXT: vslidedown.vi v16, v8, 2
+; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vrgather.vi v16, v8, 1
; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vmv.x.s a0, v8
@@ -6352,16 +6222,12 @@ define i64 @vreduce_mul_v16i64(<16 x i64>* %x) {
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu
; RV64-NEXT: vle64.v v8, (a0)
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vadd.vi v24, v16, 8
-; RV64-NEXT: vrgather.vv v0, v8, v24
-; RV64-NEXT: vmul.vv v8, v8, v0
-; RV64-NEXT: vadd.vi v24, v16, 4
-; RV64-NEXT: vrgather.vv v0, v8, v24
-; RV64-NEXT: vmul.vv v8, v8, v0
-; RV64-NEXT: vadd.vi v16, v16, 2
-; RV64-NEXT: vrgather.vv v24, v8, v16
-; RV64-NEXT: vmul.vv v8, v8, v24
+; RV64-NEXT: vslidedown.vi v16, v8, 8
+; RV64-NEXT: vmul.vv v8, v8, v16
+; RV64-NEXT: vslidedown.vi v16, v8, 4
+; RV64-NEXT: vmul.vv v8, v8, v16
+; RV64-NEXT: vslidedown.vi v16, v8, 2
+; RV64-NEXT: vmul.vv v8, v8, v16
; RV64-NEXT: vrgather.vi v16, v8, 1
; RV64-NEXT: vmul.vv v8, v8, v16
; RV64-NEXT: vmv.x.s a0, v8
@@ -6381,22 +6247,12 @@ define i64 @vreduce_mul_v32i64(<32 x i64>* %x) {
; RV32-NEXT: addi a0, a0, 128
; RV32-NEXT: vle64.v v16, (a0)
; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT: vid.v v16
-; RV32-NEXT: vadd.vi v18, v16, 8
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v8, v18
-; RV32-NEXT: vmul.vv v8, v8, v24
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT: vadd.vi v18, v16, 4
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v8, v18
-; RV32-NEXT: vmul.vv v8, v8, v24
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT: vadd.vi v16, v16, 2
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v8, v16
-; RV32-NEXT: vmul.vv v8, v8, v24
+; RV32-NEXT: vslidedown.vi v16, v8, 8
+; RV32-NEXT: vmul.vv v8, v8, v16
+; RV32-NEXT: vslidedown.vi v16, v8, 4
+; RV32-NEXT: vmul.vv v8, v8, v16
+; RV32-NEXT: vslidedown.vi v16, v8, 2
+; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vrgather.vi v16, v8, 1
; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, mu
@@ -6413,16 +6269,12 @@ define i64 @vreduce_mul_v32i64(<32 x i64>* %x) {
; RV64-NEXT: addi a0, a0, 128
; RV64-NEXT: vle64.v v16, (a0)
; RV64-NEXT: vmul.vv v8, v8, v16
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vadd.vi v24, v16, 8
-; RV64-NEXT: vrgather.vv v0, v8, v24
-; RV64-NEXT: vmul.vv v8, v8, v0
-; RV64-NEXT: vadd.vi v24, v16, 4
-; RV64-NEXT: vrgather.vv v0, v8, v24
-; RV64-NEXT: vmul.vv v8, v8, v0
-; RV64-NEXT: vadd.vi v16, v16, 2
-; RV64-NEXT: vrgather.vv v24, v8, v16
-; RV64-NEXT: vmul.vv v8, v8, v24
+; RV64-NEXT: vslidedown.vi v16, v8, 8
+; RV64-NEXT: vmul.vv v8, v8, v16
+; RV64-NEXT: vslidedown.vi v16, v8, 4
+; RV64-NEXT: vmul.vv v8, v8, v16
+; RV64-NEXT: vslidedown.vi v16, v8, 2
+; RV64-NEXT: vmul.vv v8, v8, v16
; RV64-NEXT: vrgather.vi v16, v8, 1
; RV64-NEXT: vmul.vv v8, v8, v16
; RV64-NEXT: vmv.x.s a0, v8
@@ -6448,22 +6300,12 @@ define i64 @vreduce_mul_v64i64(<64 x i64>* %x) nounwind {
; RV32-NEXT: vmul.vv v16, v24, v16
; RV32-NEXT: vmul.vv v8, v8, v0
; RV32-NEXT: vmul.vv v8, v8, v16
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT: vid.v v16
-; RV32-NEXT: vadd.vi v18, v16, 8
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v8, v18
-; RV32-NEXT: vmul.vv v8, v8, v24
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT: vadd.vi v18, v16, 4
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v8, v18
-; RV32-NEXT: vmul.vv v8, v8, v24
-; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, mu
-; RV32-NEXT: vadd.vi v16, v16, 2
-; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, mu
-; RV32-NEXT: vrgatherei16.vv v24, v8, v16
-; RV32-NEXT: vmul.vv v8, v8, v24
+; RV32-NEXT: vslidedown.vi v16, v8, 8
+; RV32-NEXT: vmul.vv v8, v8, v16
+; RV32-NEXT: vslidedown.vi v16, v8, 4
+; RV32-NEXT: vmul.vv v8, v8, v16
+; RV32-NEXT: vslidedown.vi v16, v8, 2
+; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vrgather.vi v16, v8, 1
; RV32-NEXT: vmul.vv v8, v8, v16
; RV32-NEXT: vsetivli zero, 0, e32, m8, ta, mu
@@ -6486,16 +6328,12 @@ define i64 @vreduce_mul_v64i64(<64 x i64>* %x) nounwind {
; RV64-NEXT: vmul.vv v16, v24, v16
; RV64-NEXT: vmul.vv v8, v8, v0
; RV64-NEXT: vmul.vv v8, v8, v16
-; RV64-NEXT: vid.v v16
-; RV64-NEXT: vadd.vi v24, v16, 8
-; RV64-NEXT: vrgather.vv v0, v8, v24
-; RV64-NEXT: vmul.vv v8, v8, v0
-; RV64-NEXT: vadd.vi v24, v16, 4
-; RV64-NEXT: vrgather.vv v0, v8, v24
-; RV64-NEXT: vmul.vv v8, v8, v0
-; RV64-NEXT: vadd.vi v16, v16, 2
-; RV64-NEXT: vrgather.vv v24, v8, v16
-; RV64-NEXT: vmul.vv v8, v8, v24
+; RV64-NEXT: vslidedown.vi v16, v8, 8
+; RV64-NEXT: vmul.vv v8, v8, v16
+; RV64-NEXT: vslidedown.vi v16, v8, 4
+; RV64-NEXT: vmul.vv v8, v8, v16
+; RV64-NEXT: vslidedown.vi v16, v8, 2
+; RV64-NEXT: vmul.vv v8, v8, v16
; RV64-NEXT: vrgather.vi v16, v8, 1
; RV64-NEXT: vmul.vv v8, v8, v16
; RV64-NEXT: vmv.x.s a0, v8
More information about the llvm-commits
mailing list