[llvm] [RISCV][TTI] Reduce cost of a build_vector pattern (PR #108419)

Fri Sep 20 07:53:06 PDT 2024

================
@@ -616,6 +616,39 @@ InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
   return BaseT::getShuffleCost(Kind, Tp, Mask, CostKind, Index, SubTp);
 }
 
+static unsigned isM1OrSmaller(MVT VT) {
+  RISCVII::VLMUL LMUL = RISCVTargetLowering::getLMUL(VT);
+  return (LMUL == RISCVII::VLMUL::LMUL_F8 || LMUL == RISCVII::VLMUL::LMUL_F4 ||
+          LMUL == RISCVII::VLMUL::LMUL_F2 || LMUL == RISCVII::VLMUL::LMUL_1);
+}
+
+InstructionCost RISCVTTIImpl::getScalarizationOverhead(
+    VectorType *Ty, const APInt &DemandedElts, bool Insert, bool Extract,
+    TTI::TargetCostKind CostKind) {
+  if (isa<ScalableVectorType>(Ty))
+    return InstructionCost::getInvalid();
+
+  // A build_vector (which is m1 sized or smaller) can be done in no
+  // worse than one vslide1down.vx per element in the type.  We could
+  // in theory do an explode_vector in the inverse manner, but our
+  // lowering today does not have a first class node for this pattern.
+  InstructionCost Cost = BaseT::getScalarizationOverhead(
+      Ty, DemandedElts, Insert, Extract, CostKind);
+  std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
+  if (Insert && !Extract && LT.first.isValid() && LT.second.isVector() &&
+      Ty->getScalarSizeInBits() != 1) {
+    MVT ContainerVT = LT.second;
+    if (ContainerVT.isFixedLengthVector())
+      ContainerVT = TLI->getContainerForFixedLengthVector(ContainerVT);
+    if (isM1OrSmaller(ContainerVT)) {
+      InstructionCost BV = cast<FixedVectorType>(Ty)->getNumElements();
----------------
alexey-bataev wrote:

```suggestion
      InstructionCost BV = cast<FixedVectorType>(Ty)->getNumElements() * getRISCVInstructionCost(RISCV::VSLIDE1DOWN_VX, LT.second, CostKind);
```
?

https://github.com/llvm/llvm-project/pull/108419