[llvm] e693181 - [RISCV] Use v(f)slide1down for shuffle+insert idiom
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Wed May 31 07:54:04 PDT 2023
Author: Philip Reames
Date: 2023-05-31T07:53:51-07:00
New Revision: e69318138e6cc88becbb8d095b1d2dcf76ac45e1
URL: https://github.com/llvm/llvm-project/commit/e69318138e6cc88becbb8d095b1d2dcf76ac45e1
DIFF: https://github.com/llvm/llvm-project/commit/e69318138e6cc88becbb8d095b1d2dcf76ac45e1.diff
LOG: [RISCV] Use v(f)slide1down for shuffle+insert idiom
This is a follow up to D151468 which added the vslide1down case as a sub-case of vslide1down matching. This generalizes that code into generic mask matching - specifically to point out the sub-vector insert restriction in the original patch. Since the matching logic is basically the same, go ahead and support vslide1up at the same time.
Differential Revision: https://reviews.llvm.org/D151742
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1down.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 305ad58963651..a28dbef82db3c 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -3734,20 +3734,6 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
MVT XLenVT = Subtarget.getXLenVT();
MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
auto TrueMask = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).first;
- if (Index == 1 && NumSubElts + Index == (int)NumElts &&
- isa<BuildVectorSDNode>(InPlace)) {
- if (SDValue Splat = cast<BuildVectorSDNode>(InPlace)->getSplatValue()) {
- auto OpCode =
- VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL;
- auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
- DAG.getUNDEF(ContainerVT),
- convertToScalableVector(ContainerVT, ToInsert, DAG, Subtarget),
- Splat, TrueMask,
- DAG.getConstant(NumSubElts + Index, DL, XLenVT));
- return convertFromScalableVector(VT, Vec, DAG, Subtarget);
- }
- }
-
// We slide up by the index that the subvector is being inserted at, and set
// VL to the index + the number of elements being inserted.
unsigned Policy = RISCVII::TAIL_UNDISTURBED_MASK_UNDISTURBED | RISCVII::MASK_AGNOSTIC;
@@ -3765,6 +3751,58 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlideup(const SDLoc &DL, MVT VT,
return convertFromScalableVector(VT, Slideup, DAG, Subtarget);
}
+/// Match v(f)slide1up/down idioms. These operations involve sliding
+/// N-1 elements to make room for an inserted scalar at one end.
+static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
+ SDValue V1, SDValue V2,
+ ArrayRef<int> Mask,
+ const RISCVSubtarget &Subtarget,
+ SelectionDAG &DAG) {
+ bool OpsSwapped = false;
+ if (!isa<BuildVectorSDNode>(V1)) {
+ if (!isa<BuildVectorSDNode>(V2))
+ return SDValue();
+ std::swap(V1, V2);
+ OpsSwapped = true;
+ }
+ SDValue Splat = cast<BuildVectorSDNode>(V1)->getSplatValue();
+ if (!Splat)
+ return SDValue();
+
+ // Return true if the mask could describe a slide of Mask.size() - 1
+ // elements from concat_vector(V1, V2)[Base:] to [Offset:].
+ auto isSlideMask = [](ArrayRef<int> Mask, unsigned Base, int Offset) {
+ const unsigned S = (Offset > 0) ? 0 : -Offset;
+ const unsigned E = Mask.size() - ((Offset > 0) ? Offset : 0);
+ for (unsigned i = S; i != E; ++i)
+ if (Mask[i] >= 0 && (unsigned)Mask[i] != Base + i + Offset)
+ return false;
+ return true;
+ };
+
+ const unsigned NumElts = VT.getVectorNumElements();
+ bool IsVSlidedown = isSlideMask(Mask, OpsSwapped ? 0 : NumElts, 1);
+ if (!IsVSlidedown && !isSlideMask(Mask, OpsSwapped ? 0 : NumElts, -1))
+ return SDValue();
+
+ const int InsertIdx = Mask[IsVSlidedown ? (NumElts - 1) : 0];
+ // Inserted lane must come from splat, undef scalar is legal but not profitable.
+ if (InsertIdx < 0 || InsertIdx / NumElts != (unsigned)OpsSwapped)
+ return SDValue();
+
+ MVT XLenVT = Subtarget.getXLenVT();
+ MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
+ auto [TrueMask, VL] = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
+ auto OpCode = IsVSlidedown ?
+ (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL) :
+ (VT.isFloatingPoint() ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VSLIDE1UP_VL);
+ auto Vec = DAG.getNode(OpCode, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT),
+ convertToScalableVector(ContainerVT, V2, DAG, Subtarget),
+ Splat, TrueMask, VL);
+ return convertFromScalableVector(VT, Vec, DAG, Subtarget);
+}
+
// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
// to create an interleaved vector of <[vscale x] n*2 x ty>.
// This requires that the size of ty is less than the subtarget's maximum ELEN.
@@ -3939,6 +3977,10 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
ArrayRef<int> Mask = SVN->getMask();
+ if (SDValue V =
+ lowerVECTOR_SHUFFLEAsVSlide1(DL, VT, V1, V2, Mask, Subtarget, DAG))
+ return V;
+
if (SDValue V =
lowerVECTOR_SHUFFLEAsVSlidedown(DL, VT, V1, V2, Mask, Subtarget, DAG))
return V;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1down.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1down.ll
index febb7d0afd7ba..10b37dc0b24af 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1down.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1down.ll
@@ -8,9 +8,7 @@ define <2 x i8> @vslide1down_2xi8(<2 x i8> %v, i8 %b) {
; CHECK-LABEL: vslide1down_2xi8:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslideup.vi v8, v9, 1
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: ret
%vb = insertelement <2 x i8> poison, i8 %b, i64 0
%v1 = shufflevector <2 x i8> %v, <2 x i8> %vb, <2 x i32> <i32 1, i32 2>
@@ -30,9 +28,7 @@ define <4 x i8> @vslide1down_4xi8(<4 x i8> %v, i8 %b) {
; RV64-LABEL: vslide1down_4xi8:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT: vmv.v.x v9, a0
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vslideup.vi v8, v9, 3
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: ret
%vb = insertelement <4 x i8> poison, i8 %b, i64 0
%v1 = shufflevector <4 x i8> %v, <4 x i8> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -52,9 +48,7 @@ define <4 x i8> @vslide1down_4xi8_swapped(<4 x i8> %v, i8 %b) {
; RV64-LABEL: vslide1down_4xi8_swapped:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
-; RV64-NEXT: vmv.v.x v9, a0
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vslideup.vi v8, v9, 3
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: ret
%vb = insertelement <4 x i8> poison, i8 %b, i64 0
%v1 = shufflevector <4 x i8> %vb, <4 x i8> %v, <4 x i32> <i32 5, i32 6, i32 7, i32 0>
@@ -74,9 +68,7 @@ define <2 x i16> @vslide1down_2xi16(<2 x i16> %v, i16 %b) {
; RV64-LABEL: vslide1down_2xi16:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; RV64-NEXT: vmv.v.x v9, a0
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vslideup.vi v8, v9, 1
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: ret
%vb = insertelement <2 x i16> poison, i16 %b, i64 0
%v1 = shufflevector <2 x i16> %v, <2 x i16> %vb, <2 x i32> <i32 1, i32 2>
@@ -87,9 +79,7 @@ define <4 x i16> @vslide1down_4xi16(<4 x i16> %v, i16 %b) {
; RV32-LABEL: vslide1down_4xi16:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v9, a0
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vslideup.vi v8, v9, 3
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: ret
;
; RV64-LABEL: vslide1down_4xi16:
@@ -109,9 +99,7 @@ define <2 x i32> @vslide1down_2xi32(<2 x i32> %v, i32 %b) {
; RV32-LABEL: vslide1down_2xi32:
; RV32: # %bb.0:
; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; RV32-NEXT: vmv.v.x v9, a0
-; RV32-NEXT: vslidedown.vi v8, v8, 1
-; RV32-NEXT: vslideup.vi v8, v9, 1
+; RV32-NEXT: vslide1down.vx v8, v8, a0
; RV32-NEXT: ret
;
; RV64-LABEL: vslide1down_2xi32:
@@ -131,9 +119,7 @@ define <4 x i32> @vslide1down_4xi32(<4 x i32> %v, i32 %b) {
; CHECK-LABEL: vslide1down_4xi32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslideup.vi v8, v9, 3
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: ret
%vb = insertelement <4 x i32> poison, i32 %b, i64 0
%v1 = shufflevector <4 x i32> %v, <4 x i32> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -158,9 +144,7 @@ define <2 x i64> @vslide1down_2xi64(<2 x i64> %v, i64 %b) {
; RV64-LABEL: vslide1down_2xi64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; RV64-NEXT: vmv.v.x v9, a0
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vslideup.vi v8, v9, 1
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: ret
%vb = insertelement <2 x i64> poison, i64 %b, i64 0
%v1 = shufflevector <2 x i64> %v, <2 x i64> %vb, <2 x i32> <i32 1, i32 2>
@@ -185,9 +169,7 @@ define <4 x i64> @vslide1down_4xi64(<4 x i64> %v, i64 %b) {
; RV64-LABEL: vslide1down_4xi64:
; RV64: # %bb.0:
; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; RV64-NEXT: vmv.v.x v10, a0
-; RV64-NEXT: vslidedown.vi v8, v8, 1
-; RV64-NEXT: vslideup.vi v8, v10, 3
+; RV64-NEXT: vslide1down.vx v8, v8, a0
; RV64-NEXT: ret
%vb = insertelement <4 x i64> poison, i64 %b, i64 0
%v1 = shufflevector <4 x i64> %v, <4 x i64> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -198,9 +180,7 @@ define <2 x half> @vslide1down_2xf16(<2 x half> %v, half %b) {
; CHECK-LABEL: vslide1down_2xf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
-; CHECK-NEXT: vfmv.v.f v9, fa0
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslideup.vi v8, v9, 1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
; CHECK-NEXT: ret
%vb = insertelement <2 x half> poison, half %b, i64 0
%v1 = shufflevector <2 x half> %v, <2 x half> %vb, <2 x i32> <i32 1, i32 2>
@@ -211,9 +191,7 @@ define <4 x half> @vslide1down_4xf16(<4 x half> %v, half %b) {
; CHECK-LABEL: vslide1down_4xf16:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
-; CHECK-NEXT: vfmv.v.f v9, fa0
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslideup.vi v8, v9, 3
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
; CHECK-NEXT: ret
%vb = insertelement <4 x half> poison, half %b, i64 0
%v1 = shufflevector <4 x half> %v, <4 x half> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -224,9 +202,7 @@ define <2 x float> @vslide1down_2xf32(<2 x float> %v, float %b) {
; CHECK-LABEL: vslide1down_2xf32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
-; CHECK-NEXT: vfmv.v.f v9, fa0
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslideup.vi v8, v9, 1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
; CHECK-NEXT: ret
%vb = insertelement <2 x float> poison, float %b, i64 0
%v1 = shufflevector <2 x float> %v, <2 x float> %vb, <2 x i32> <i32 1, i32 2>
@@ -237,9 +213,7 @@ define <4 x float> @vslide1down_4xf32(<4 x float> %v, float %b) {
; CHECK-LABEL: vslide1down_4xf32:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
-; CHECK-NEXT: vfmv.v.f v9, fa0
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslideup.vi v8, v9, 3
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
; CHECK-NEXT: ret
%vb = insertelement <4 x float> poison, float %b, i64 0
%v1 = shufflevector <4 x float> %v, <4 x float> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -250,9 +224,7 @@ define <2 x double> @vslide1down_2xf64(<2 x double> %v, double %b) {
; CHECK-LABEL: vslide1down_2xf64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
-; CHECK-NEXT: vfmv.v.f v9, fa0
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslideup.vi v8, v9, 1
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
; CHECK-NEXT: ret
%vb = insertelement <2 x double> poison, double %b, i64 0
%v1 = shufflevector <2 x double> %v, <2 x double> %vb, <2 x i32> <i32 1, i32 2>
@@ -263,9 +235,7 @@ define <4 x double> @vslide1down_4xf64(<4 x double> %v, double %b) {
; CHECK-LABEL: vslide1down_4xf64:
; CHECK: # %bb.0:
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
-; CHECK-NEXT: vfmv.v.f v10, fa0
-; CHECK-NEXT: vslidedown.vi v8, v8, 1
-; CHECK-NEXT: vslideup.vi v8, v10, 3
+; CHECK-NEXT: vfslide1down.vf v8, v8, fa0
; CHECK-NEXT: ret
%vb = insertelement <4 x double> poison, double %b, i64 0
%v1 = shufflevector <4 x double> %v, <4 x double> %vb, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
@@ -275,14 +245,8 @@ define <4 x double> @vslide1down_4xf64(<4 x double> %v, double %b) {
define <4 x i8> @vslide1down_4xi8_with_splat(<4 x i8> %v, i8 %b) {
; CHECK-LABEL: vslide1down_4xi8_with_splat:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 7
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vi v10, v9, 1
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
-; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vslide1down.vx v8, v8, a0
; CHECK-NEXT: ret
%vb = insertelement <4 x i8> poison, i8 %b, i64 0
%v1 = shufflevector <4 x i8> %vb, <4 x i8> poison, <4 x i32> zeroinitializer
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll
index 19154c6a7a70d..e4fb535b9ebd8 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-shuffle-vslide1up.ll
@@ -261,13 +261,8 @@ define <4 x double> @vslide1up_4xf64(<4 x double> %v, double %b) {
define <4 x i8> @vslide1up_4xi8_with_splat(<4 x i8> %v, i8 %b) {
; CHECK-LABEL: vslide1up_4xi8_with_splat:
; CHECK: # %bb.0:
-; CHECK-NEXT: li a1, 14
-; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, mu
-; CHECK-NEXT: vmv.s.x v0, a1
-; CHECK-NEXT: vid.v v9
-; CHECK-NEXT: vadd.vi v10, v9, -1
-; CHECK-NEXT: vmv.v.x v9, a0
-; CHECK-NEXT: vrgather.vv v9, v8, v10, v0.t
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma
+; CHECK-NEXT: vslide1up.vx v9, v8, a0
; CHECK-NEXT: vmv1r.v v8, v9
; CHECK-NEXT: ret
%vb = insertelement <4 x i8> poison, i8 %b, i64 0
@@ -414,3 +409,18 @@ define <4 x i8> @vslide1up_4xi8_neg_incorrect_insert3(<4 x i8> %v, i8 %b) {
%v2 = shufflevector <4 x i8> poison, <4 x i8> %v, <4 x i32> <i32 5, i32 4, i32 5, i32 6>
ret <4 x i8> %v2
}
+
+define <2 x i8> @vslide1up_4xi8_neg_length_changing(<4 x i8> %v, i8 %b) {
+; CHECK-LABEL: vslide1up_4xi8_neg_length_changing:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 4, e8, mf4, tu, ma
+; CHECK-NEXT: vmv1r.v v9, v8
+; CHECK-NEXT: vmv.s.x v9, a0
+; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma
+; CHECK-NEXT: vslideup.vi v9, v8, 1
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %v1 = insertelement <4 x i8> %v, i8 %b, i64 0
+ %v2 = shufflevector <4 x i8> %v1, <4 x i8> %v, <2 x i32> <i32 0, i32 4>
+ ret <2 x i8> %v2
+}
More information about the llvm-commits
mailing list