[llvm] b0e77d5 - [RISCV] Lower the shufflevector equivalent of vector.splice
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 10 09:39:41 PST 2022
Author: Craig Topper
Date: 2022-02-10T09:39:35-08:00
New Revision: b0e77d5e489a91f39724e3e3bca48624de2fbaa2
URL: https://github.com/llvm/llvm-project/commit/b0e77d5e489a91f39724e3e3bca48624de2fbaa2
DIFF: https://github.com/llvm/llvm-project/commit/b0e77d5e489a91f39724e3e3bca48624de2fbaa2.diff
LOG: [RISCV] Lower the shufflevector equivalent of vector.splice
We can lower a vector splice to a vslidedown and a vslideup.
The majority of the matching code here came from X86's code for matching
PALIGNR and VPALIGND/Q.
The slidedown and slideup lowering don't really require it to be concatenation,
but it happened to be an interesting pattern with existing analysis code I
could use.
This helps with cases where the scalar loop optimizer forwarded a load
result from a previous loop iteration. For example, this happens if the
loop uses x[i] and x[i+1] on the same iteration. The scalar optimizer
will forward x[i+1] load from the previous loop to satisfy x[i] on this
loop. When this get vectorized it results in one element of a vector
being forwarded from the previous loop to be concatenated with elements
loaded on this iteration.
Whether that's more efficient than doing a shifted loaded or reloading
the single scalar and using vslide1up is an interesting question.
But that's not something the backend can help with.
Reviewed By: khchen
Differential Revision: https://reviews.llvm.org/D119039
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 58d8e68b266eb..e9dd14348822e 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -2514,6 +2514,72 @@ static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, bool &SwapSources,
return true;
}
+static int isElementRotate(SDValue &V1, SDValue &V2, ArrayRef<int> Mask) {
+ int Size = Mask.size();
+
+ // We need to detect various ways of spelling a rotation:
+ // [11, 12, 13, 14, 15, 0, 1, 2]
+ // [-1, 12, 13, 14, -1, -1, 1, -1]
+ // [-1, -1, -1, -1, -1, -1, 1, 2]
+ // [ 3, 4, 5, 6, 7, 8, 9, 10]
+ // [-1, 4, 5, 6, -1, -1, 9, -1]
+ // [-1, 4, 5, 6, -1, -1, -1, -1]
+ int Rotation = 0;
+ SDValue Lo, Hi;
+ for (int i = 0; i != Size; ++i) {
+ int M = Mask[i];
+ if (M < 0)
+ continue;
+
+ // Determine where a rotate vector would have started.
+ int StartIdx = i - (M % Size);
+ // The identity rotation isn't interesting, stop.
+ if (StartIdx == 0)
+ return -1;
+
+ // If we found the tail of a vector the rotation must be the missing
+ // front. If we found the head of a vector, it must be how much of the
+ // head.
+ int CandidateRotation = StartIdx < 0 ? -StartIdx : Size - StartIdx;
+
+ if (Rotation == 0)
+ Rotation = CandidateRotation;
+ else if (Rotation != CandidateRotation)
+ // The rotations don't match, so we can't match this mask.
+ return -1;
+
+ // Compute which value this mask is pointing at.
+ SDValue MaskV = M < Size ? V1 : V2;
+
+ // Compute which of the two target values this index should be assigned to.
+ // This reflects whether the high elements are remaining or the low elemnts
+ // are remaining.
+ SDValue &TargetV = StartIdx < 0 ? Hi : Lo;
+
+ // Either set up this value if we've not encountered it before, or check
+ // that it remains consistent.
+ if (!TargetV)
+ TargetV = MaskV;
+ else if (TargetV != MaskV)
+ // This may be a rotation, but it pulls from the inputs in some
+ // unsupported interleaving.
+ return -1;
+ }
+
+ // Check that we successfully analyzed the mask, and normalize the results.
+ assert(Rotation != 0 && "Failed to locate a viable rotation!");
+ assert((Lo || Hi) && "Failed to find a rotated input vector!");
+
+ // Make sure we've found a value for both halves.
+ if (!Lo || !Hi)
+ return -1;
+
+ V1 = Lo;
+ V2 = Hi;
+
+ return Rotation;
+}
+
static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue V1 = Op.getOperand(0);
@@ -2619,6 +2685,33 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
return convertFromScalableVector(VT, SlideDown, DAG, Subtarget);
}
+ // Match shuffles that concatenate two vectors, rotate the concatenation,
+ // and then extract the original number of elements from the rotated result.
+ // This is equivalent to vector.splice or X86's PALIGNR instruction. Lower
+ // it to a SLIDEDOWN and a SLIDEUP.
+ // FIXME: We don't really need it to be a concatenation. We just need two
+ // regions with contiguous elements that need to be shifted down and up.
+ int Rotation = isElementRotate(V1, V2, Mask);
+ if (Rotation > 0) {
+ // We found a rotation. We need to slide V1 down by Rotation. Using
+ // (NumElts - Rotation) for VL. Then we need to slide V2 up by
+ // (NumElts - Rotation) using NumElts for VL.
+ V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
+ V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
+
+ unsigned InvRotate = NumElts - Rotation;
+ SDValue SlideDown =
+ DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
+ DAG.getUNDEF(ContainerVT), V2,
+ DAG.getConstant(Rotation, DL, XLenVT),
+ TrueMask, DAG.getConstant(InvRotate, DL, XLenVT));
+ SDValue SlideUp =
+ DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, SlideDown, V1,
+ DAG.getConstant(InvRotate, DL, XLenVT),
+ TrueMask, VL);
+ return convertFromScalableVector(VT, SlideUp, DAG, Subtarget);
+ }
+
// Detect an interleave shuffle and lower to
// (vmaccu.vx (vwaddu.vx lohalf(V1), lohalf(V2)), lohalf(V2), (2^eltbits - 1))
bool SwapSources;
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
index 48cbc82faa358..83f5832b33cce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll
@@ -255,3 +255,54 @@ define <8 x float> @slidedown_v8f32(<8 x float> %x) {
%s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x float> %s
}
+
+define <8 x float> @splice_unary(<8 x float> %x) {
+; CHECK-LABEL: splice_unary:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, mu
+; CHECK-NEXT: vslidedown.vi v10, v8, 1
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu
+; CHECK-NEXT: vslideup.vi v10, v8, 7
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x float> %x, <8 x float> poison, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
+ ret <8 x float> %s
+}
+
+define <8 x double> @splice_unary2(<8 x double> %x) {
+; CHECK-LABEL: splice_unary2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 2, e64, m4, ta, mu
+; CHECK-NEXT: vslidedown.vi v12, v8, 6
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, mu
+; CHECK-NEXT: vslideup.vi v12, v8, 2
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x double> %x, <8 x double> poison, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
+ ret <8 x double> %s
+}
+
+define <8 x float> @splice_binary(<8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: splice_binary:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu
+; CHECK-NEXT: vslideup.vi v8, v10, 6
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x float> %x, <8 x float> %y, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 9>
+ ret <8 x float> %s
+}
+
+define <8 x double> @splice_binary2(<8 x double> %x, <8 x double> %y) {
+; CHECK-LABEL: splice_binary2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e64, m4, ta, mu
+; CHECK-NEXT: vslidedown.vi v12, v12, 5
+; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, mu
+; CHECK-NEXT: vslideup.vi v12, v8, 3
+; CHECK-NEXT: vmv4r.v v8, v12
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x double> %x, <8 x double> %y, <8 x i32> <i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 4>
+ ret <8 x double> %s
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
index 62d897220f90a..0b4e7ac65bbce 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll
@@ -554,3 +554,53 @@ define <8 x i32> @slidedown_v8i32(<8 x i32> %x) {
%s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 3, i32 undef, i32 5, i32 6, i32 undef, i32 undef, i32 undef, i32 undef>
ret <8 x i32> %s
}
+
+define <8 x i16> @splice_unary(<8 x i16> %x) {
+; CHECK-LABEL: splice_unary:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, mu
+; CHECK-NEXT: vslidedown.vi v9, v8, 2
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vi v9, v8, 6
+; CHECK-NEXT: vmv1r.v v8, v9
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x i16> %x, <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
+ ret <8 x i16> %s
+}
+
+define <8 x i32> @splice_unary2(<8 x i32> %x) {
+; CHECK-LABEL: splice_unary2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m2, ta, mu
+; CHECK-NEXT: vslidedown.vi v10, v8, 5
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu
+; CHECK-NEXT: vslideup.vi v10, v8, 3
+; CHECK-NEXT: vmv2r.v v8, v10
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x i32> %x, <8 x i32> poison, <8 x i32> <i32 undef, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4>
+ ret <8 x i32> %s
+}
+
+define <8 x i16> @splice_binary(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: splice_binary:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 2
+; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, mu
+; CHECK-NEXT: vslideup.vi v8, v9, 6
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 9>
+ ret <8 x i16> %s
+}
+
+define <8 x i32> @splice_binary2(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: splice_binary2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetivli zero, 3, e32, m2, ta, mu
+; CHECK-NEXT: vslidedown.vi v8, v8, 5
+; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, mu
+; CHECK-NEXT: vslideup.vi v8, v10, 3
+; CHECK-NEXT: ret
+ %s = shufflevector <8 x i32> %x, <8 x i32> %y, <8 x i32> <i32 undef, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12>
+ ret <8 x i32> %s
+}
More information about the llvm-commits
mailing list