[llvm] [RISCV][TTI]Use processShuffleMasks for cost estimations/actual per-register shuffles (PR #118103)
Philip Reames via llvm-commits
llvm-commits at lists.llvm.org
Mon Jan 27 09:26:52 PST 2025
================
@@ -389,7 +389,103 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
// First, handle cases where having a fixed length vector enables us to
// give a more accurate cost than falling back to generic scalable codegen.
// TODO: Each of these cases hints at a modeling gap around scalable vectors.
- if (isa<FixedVectorType>(Tp)) {
+ if (ST->hasVInstructions() && isa<FixedVectorType>(Tp)) {
+ MVT LegalVT = LT.second;
+ InstructionCost NumOfDests = InstructionCost::getInvalid();
+ const auto VLen = ST->getRealVLen();
+ if (VLen && LegalVT.isFixedLengthVector() && !Mask.empty()) {
+ MVT ElemVT = LegalVT.getVectorElementType();
+ unsigned ElemsPerVReg = *VLen / ElemVT.getFixedSizeInBits();
+ LegalVT = getTypeLegalizationCost(
+ FixedVectorType::get(Tp->getElementType(), ElemsPerVReg))
+ .second;
+ // Number of destination vectors after legalization:
+ NumOfDests = divideCeil(Mask.size(), LegalVT.getVectorNumElements());
+ }
+ if (NumOfDests.isValid() && NumOfDests > 1 &&
+ LegalVT.isFixedLengthVector() &&
+ LegalVT.getVectorElementType().getSizeInBits() ==
+ Tp->getElementType()->getPrimitiveSizeInBits() &&
+ LegalVT.getVectorNumElements() <
+ Tp->getElementCount().getFixedValue()) {
+ unsigned VecTySize = DL.getTypeStoreSize(Tp);
+ unsigned LegalVTSize = LegalVT.getStoreSize();
+ // Number of source vectors after legalization:
+ unsigned NumOfSrcs = divideCeil(VecTySize, LegalVTSize);
+
+ auto *SingleOpTy = FixedVectorType::get(Tp->getElementType(),
+ LegalVT.getVectorNumElements());
+
+ // Try to perform better estimation of the permutation.
+ // 1. Split the source/destination vectors into real registers.
+ // 2. Do the mask analysis to identify which real registers are
+ // permuted. If more than 1 source registers are used for the
+ // destination register building, the cost for this destination register
+ // is (Number_of_source_register - 1) * Cost_PermuteTwoSrc. If only one
+ // source register is used, build mask and calculate the cost as a cost
+ // of PermuteSingleSrc.
+ // Also, for the single register permute we try to identify if the
+ // destination register is just a copy of the source register or the
+ // copy of the previous destination register (the cost is
+ // TTI::TCC_Basic). If the source register is just reused, the cost for
+ // this operation is 0.
+ unsigned E = *NumOfDests.getValue();
+ unsigned NormalizedVF =
+ LegalVT.getVectorNumElements() * std::max(NumOfSrcs, E);
+ unsigned NumOfSrcRegs = NormalizedVF / LegalVT.getVectorNumElements();
+ unsigned NumOfDestRegs = NormalizedVF / LegalVT.getVectorNumElements();
+ SmallVector<int> NormalizedMask(NormalizedVF, PoisonMaskElem);
+ copy(Mask, NormalizedMask.begin());
+ InstructionCost Cost = 0;
+ SmallBitVector ExtractedRegs(2 * NumOfSrcRegs);
+ int NumShuffles = 0;
+ processShuffleMasks(
+ NormalizedMask, NumOfSrcRegs, NumOfDestRegs, NumOfDestRegs, []() {},
+ [&](ArrayRef<int> RegMask, unsigned SrcReg, unsigned DestReg) {
+ if (ExtractedRegs.test(SrcReg)) {
+ Cost += getShuffleCost(TTI::SK_ExtractSubvector, Tp, {}, CostKind,
+ (SrcReg % NumOfSrcRegs) *
+ SingleOpTy->getNumElements(),
+ SingleOpTy);
+ ExtractedRegs.set(SrcReg);
+ }
+ ++NumShuffles;
----------------
preames wrote:
Please sink the increment into the if-block and remove the decrement on the untaken path.
https://github.com/llvm/llvm-project/pull/118103
More information about the llvm-commits
mailing list