[llvm] 66a0a08 - [RISCV] Extract spread(2, 4, 8) shuffle lowering from interleave(2) (#118822)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 5 11:32:31 PST 2024
Author: Philip Reames
Date: 2024-12-05T11:32:27-08:00
New Revision: 66a0a081338d9942997d1620db5c37c9c72ec3f3
URL: https://github.com/llvm/llvm-project/commit/66a0a081338d9942997d1620db5c37c9c72ec3f3
DIFF: https://github.com/llvm/llvm-project/commit/66a0a081338d9942997d1620db5c37c9c72ec3f3.diff
LOG: [RISCV] Extract spread(2,4,8) shuffle lowering from interleave(2) (#118822)
This is a prep patch for improving spread(4,8) shuffles. I also think it
improves the readability of the existing code, but the primary
motivation is simply staging work.
Added:
Modified:
llvm/lib/Target/RISCV/RISCVISelLowering.cpp
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d5160381caa386..e197e80bfc00c5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4824,12 +4824,46 @@ static SDValue lowerVECTOR_SHUFFLEAsVSlide1(const SDLoc &DL, MVT VT,
return convertFromScalableVector(VT, Vec, DAG, Subtarget);
}
+// Given a vector a, b, c, d return a vector Factor times longer
+// with Factor-1 undef's between elements. Ex:
+// a, undef, b, undef, c, undef, d, undef (Factor=2, Index=0)
+// undef, a, undef, b, undef, c, undef, d (Factor=2, Index=1)
+static SDValue getWideningSpread(SDValue V, unsigned Factor, unsigned Index,
+ const SDLoc &DL, SelectionDAG &DAG) {
+
+ MVT VT = V.getSimpleValueType();
+ unsigned EltBits = VT.getScalarSizeInBits();
+ ElementCount EC = VT.getVectorElementCount();
+ V = DAG.getBitcast(VT.changeTypeToInteger(), V);
+
+ MVT WideVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), EC);
+
+ SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, V);
+ // TODO: On rv32, the constant becomes a splat_vector_parts which does not
+ // allow the SHL to fold away if Index is 0.
+ if (Index != 0)
+ Result = DAG.getNode(ISD::SHL, DL, WideVT, Result,
+ DAG.getConstant(EltBits * Index, DL, WideVT));
+ // Make sure to use original element type
+ MVT ResultVT = MVT::getVectorVT(VT.getVectorElementType(),
+ EC.multiplyCoefficientBy(Factor));
+ return DAG.getBitcast(ResultVT, Result);
+}
+
// Given two input vectors of <[vscale x ]n x ty>, use vwaddu.vv and vwmaccu.vx
// to create an interleaved vector of <[vscale x] n*2 x ty>.
// This requires that the size of ty is less than the subtarget's maximum ELEN.
static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
const SDLoc &DL, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
+
+ // FIXME: Not only does this optimize the code, it fixes some correctness
+ // issues because MIR does not have freeze.
+ if (EvenV.isUndef())
+ return getWideningSpread(OddV, 2, 1, DL, DAG);
+ if (OddV.isUndef())
+ return getWideningSpread(EvenV, 2, 0, DL, DAG);
+
MVT VecVT = EvenV.getSimpleValueType();
MVT VecContainerVT = VecVT; // <vscale x n x ty>
// Convert fixed vectors to scalable if needed
@@ -4861,29 +4895,14 @@ static SDValue getWideningInterleave(SDValue EvenV, SDValue OddV,
SDValue Passthru = DAG.getUNDEF(WideContainerVT);
SDValue Interleaved;
- if (OddV.isUndef()) {
- // If OddV is undef, this is a zero extend.
- // FIXME: Not only does this optimize the code, it fixes some correctness
- // issues because MIR does not have freeze.
- Interleaved =
- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, EvenV, Mask, VL);
- } else if (Subtarget.hasStdExtZvbb()) {
+ if (Subtarget.hasStdExtZvbb()) {
// Interleaved = (OddV << VecVT.getScalarSizeInBits()) + EvenV.
SDValue OffsetVec =
DAG.getConstant(VecVT.getScalarSizeInBits(), DL, VecContainerVT);
Interleaved = DAG.getNode(RISCVISD::VWSLL_VL, DL, WideContainerVT, OddV,
OffsetVec, Passthru, Mask, VL);
- if (!EvenV.isUndef())
- Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
- Interleaved, EvenV, Passthru, Mask, VL);
- } else if (EvenV.isUndef()) {
- Interleaved =
- DAG.getNode(RISCVISD::VZEXT_VL, DL, WideContainerVT, OddV, Mask, VL);
-
- SDValue OffsetVec =
- DAG.getConstant(VecVT.getScalarSizeInBits(), DL, WideContainerVT);
- Interleaved = DAG.getNode(RISCVISD::SHL_VL, DL, WideContainerVT,
- Interleaved, OffsetVec, Passthru, Mask, VL);
+ Interleaved = DAG.getNode(RISCVISD::VWADDU_W_VL, DL, WideContainerVT,
+ Interleaved, EvenV, Passthru, Mask, VL);
} else {
// FIXME: We should freeze the odd vector here. We already handled the case
// of provably undef/poison above.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
index 99aaecf4c6843d..4b09b571b94069 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll
@@ -247,12 +247,12 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) {
; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; V128-NEXT: vslidedown.vi v24, v16, 16
; V128-NEXT: li a0, 32
-; V128-NEXT: vslidedown.vi v0, v8, 16
; V128-NEXT: lui a1, 699051
+; V128-NEXT: vslidedown.vi v0, v8, 16
; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; V128-NEXT: vzext.vf2 v8, v24
-; V128-NEXT: vzext.vf2 v24, v0
; V128-NEXT: addi a1, a1, -1366
+; V128-NEXT: vzext.vf2 v24, v0
; V128-NEXT: vmv.s.x v0, a1
; V128-NEXT: vsll.vx v8, v8, a0
; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
index 7500198f140022..da7cdf3ba8ec01 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll
@@ -410,12 +410,12 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) {
; V128-NEXT: vsetivli zero, 16, e32, m8, ta, ma
; V128-NEXT: vslidedown.vi v24, v16, 16
; V128-NEXT: li a0, 32
-; V128-NEXT: vslidedown.vi v0, v8, 16
; V128-NEXT: lui a1, 699051
+; V128-NEXT: vslidedown.vi v0, v8, 16
; V128-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; V128-NEXT: vzext.vf2 v8, v24
-; V128-NEXT: vzext.vf2 v24, v0
; V128-NEXT: addi a1, a1, -1366
+; V128-NEXT: vzext.vf2 v24, v0
; V128-NEXT: vmv.s.x v0, a1
; V128-NEXT: vsll.vx v8, v8, a0
; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma
More information about the llvm-commits
mailing list