[llvm] [RISCV][CG]Use processShuffleMasks for per-register shuffles (PR #121765)
Alexey Bataev via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 8 08:15:35 PST 2025
================
@@ -5121,58 +5120,102 @@ static SDValue lowerShuffleViaVRegSplitting(ShuffleVectorSDNode *SVN,
MVT ElemVT = VT.getVectorElementType();
unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
- unsigned VRegsPerSrc = NumElts / ElemsPerVReg;
-
- SmallVector<std::pair<int, SmallVector<int>>>
- OutMasks(VRegsPerSrc, {-1, {}});
-
- // Check if our mask can be done as a 1-to-1 mapping from source
- // to destination registers in the group without needing to
- // write each destination more than once.
- for (unsigned DstIdx = 0; DstIdx < Mask.size(); DstIdx++) {
- int DstVecIdx = DstIdx / ElemsPerVReg;
- int DstSubIdx = DstIdx % ElemsPerVReg;
- int SrcIdx = Mask[DstIdx];
- if (SrcIdx < 0 || (unsigned)SrcIdx >= 2 * NumElts)
- continue;
- int SrcVecIdx = SrcIdx / ElemsPerVReg;
- int SrcSubIdx = SrcIdx % ElemsPerVReg;
- if (OutMasks[DstVecIdx].first == -1)
- OutMasks[DstVecIdx].first = SrcVecIdx;
- if (OutMasks[DstVecIdx].first != SrcVecIdx)
- // Note: This case could easily be handled by keeping track of a chain
- // of source values and generating two element shuffles below. This is
- // less an implementation question, and more a profitability one.
- return SDValue();
-
- OutMasks[DstVecIdx].second.resize(ElemsPerVReg, -1);
- OutMasks[DstVecIdx].second[DstSubIdx] = SrcSubIdx;
- }
EVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
MVT OneRegVT = MVT::getVectorVT(ElemVT, ElemsPerVReg);
MVT M1VT = getContainerForFixedLengthVector(DAG, OneRegVT, Subtarget);
assert(M1VT == getLMUL1VT(M1VT));
unsigned NumOpElts = M1VT.getVectorMinNumElements();
- SDValue Vec = DAG.getUNDEF(ContainerVT);
+ unsigned NormalizedVF = ContainerVT.getVectorMinNumElements();
+ unsigned NumOfSrcRegs = NormalizedVF / NumOpElts;
+ unsigned NumOfDestRegs = NormalizedVF / NumOpElts;
// The following semantically builds up a fixed length concat_vector
// of the component shuffle_vectors. We eagerly lower to scalable here
// to avoid DAG combining it back to a large shuffle_vector again.
V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
- for (unsigned DstVecIdx = 0 ; DstVecIdx < OutMasks.size(); DstVecIdx++) {
- auto &[SrcVecIdx, SrcSubMask] = OutMasks[DstVecIdx];
- if (SrcVecIdx == -1)
- continue;
- unsigned ExtractIdx = (SrcVecIdx % VRegsPerSrc) * NumOpElts;
- SDValue SrcVec = (unsigned)SrcVecIdx >= VRegsPerSrc ? V2 : V1;
+ SmallVector<SmallVector<std::tuple<unsigned, unsigned, SmallVector<int>>>>
+ Operands;
+ processShuffleMasks(
+ Mask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs,
+ [&]() { Operands.emplace_back(); },
+ [&](ArrayRef<int> SrcSubMask, unsigned SrcVecIdx, unsigned DstVecIdx) {
+ Operands.emplace_back().emplace_back(
+ SrcVecIdx, UINT_MAX,
+ SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
+ },
+ [&](ArrayRef<int> SrcSubMask, unsigned Idx1, unsigned Idx2, bool NewReg) {
+ if (NewReg)
+ Operands.emplace_back();
+ Operands.back().emplace_back(
+ Idx1, Idx2, SmallVector<int>(SrcSubMask.begin(), SrcSubMask.end()));
+ });
+ assert(Operands.size() == NumOfDestRegs && "Whole vector must be processed");
+ // Note: check that we do not emit too many shuffles here to prevent code
+ // size explosion.
+ // TODO: investigate, if it can be improved by extra analysis of the masks to
+ // check if the code is more profitable.
+ unsigned NumShuffles = std::accumulate(
+ Operands.begin(), Operands.end(), 0u,
+ [&](unsigned N,
+ ArrayRef<std::tuple<unsigned, unsigned, SmallVector<int>>> Data) {
+ if (Data.empty())
+ return N;
+ N += Data.size();
+ for (const auto &P : Data) {
+ unsigned Idx2 = std::get<1>(P);
+ ArrayRef<int> Mask = std::get<2>(P);
+ if (Idx2 != UINT_MAX)
+ ++N;
+ else if (ShuffleVectorInst::isIdentityMask(Mask, Mask.size()))
+ --N;
+ }
+ return N;
+ });
+ if ((NumOfDestRegs > 2 && NumShuffles > NumOfDestRegs) ||
----------------
alexey-bataev wrote:
Simplified
https://github.com/llvm/llvm-project/pull/121765
More information about the llvm-commits
mailing list