[llvm] [TTI][RISCV]Improve costs for fixed vector whole reg extract/insert. (PR #80164)
Luke Lau via llvm-commits
llvm-commits at lists.llvm.org
Sun Feb 11 18:47:23 PST 2024
================
@@ -326,6 +326,48 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
switch (Kind) {
default:
break;
+ case TTI::SK_ExtractSubvector:
+ if (isa<FixedVectorType>(SubTp) &&
+ LT.second.getVectorElementType() != MVT::i1) {
+ unsigned SubTpRegs = getRegUsageForType(SubTp);
+ unsigned SubNumElems = NextPowerOf2(
+ divideCeil(SubTp->getElementCount().getFixedValue(), SubTpRegs));
+ // Whole vector extract - just the vector itself + (possible) vsetvli.
+ // TODO: consider adding the cost for vsetvli.
+ if (Index == 0 || (ST->getRealMaxVLen() == ST->getRealMinVLen() &&
+ SubNumElems * LT.second.getScalarSizeInBits() ==
+ ST->getRealMinVLen() &&
+ Index % SubNumElems == 0))
+ return TTI::TCC_Free;
+ }
+ break;
+ case TTI::SK_InsertSubvector:
+ if (auto *FSubTy = dyn_cast<FixedVectorType>(SubTp)) {
+ unsigned TpRegs = getRegUsageForType(Tp);
+ unsigned SubTpRegs = getRegUsageForType(SubTp);
+ unsigned NextSubTpRegs = getRegUsageForType(FixedVectorType::get(
+ Tp->getElementType(), FSubTy->getNumElements() + 1));
+ unsigned NumElems = NextPowerOf2(
+ divideCeil(Tp->getElementCount().getFixedValue(), TpRegs));
+ // Whole vector insert - just the vector itself + (possible) vsetvli.
+ // TODO: consider adding the cost for vsetvli.
+ if ((Index == 0 || (ST->getRealMaxVLen() == ST->getRealMinVLen() &&
+ NumElems * LT.second.getScalarSizeInBits() ==
+ ST->getRealMinVLen() &&
+ Index % NumElems == 0)) &&
+ (any_of(Args, UndefValue::classof) ||
+ (SubTpRegs != 0 && SubTpRegs != NextSubTpRegs &&
----------------
lukel97 wrote:
When would SubTpRegs == 0? Is it possible for getRegUsageForType to return 0?
https://github.com/llvm/llvm-project/pull/80164
More information about the llvm-commits
mailing list