[PATCH] D132566: [SLP] Fix cost model w.r.t. operand properties

Wed Aug 24 08:12:40 PDT 2022

reames created this revision.
reames added reviewers: ABataev, RKSimon, craig.topper.
Herald added subscribers: vporpo, StephenFan, bollu, hiraditya, mcrosier.
Herald added a project: All.
reames requested review of this revision.
Herald added a project: LLVM.

We allow the target to report different costs depending on properties of the operands; given this, we have to make sure we pass the right set of operands and account for the fact that different scalar instructions can have operands with different properties.

As a motivating example, consider a set of multiplies which each multiply by a constant (but not all the same constant).  Most of the constants are power of two (but not all).

If the target doesn't have support for non-uniform constant immediates, this will likely require constant materialization and a non-uniform multiply.  However, depending on the balance of target costs for constant scalar multiplies vs a single vector multiply, this might or might not be a profitable vectorization.

This ends up basically being a rewrite of the existing code.  Normally, I'd scope the change more narrowly, but I kept noticing things which seemed highly suspicious, and none of the existing code appears to have any test coverage at all.  I think this is a case where simply throwing out the existing code and starting from scratch is reasonable.

This is a follow on to Alexey's D126885 <https://reviews.llvm.org/D126885>, but also handles the arithmetic instruction case since the existing code appears to have the same problem.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D132566

Files:
  llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp


Index: llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
===================================================================

--- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -6410,32 +6410,26 @@
     case Instruction::And:
     case Instruction::Or:
     case Instruction::Xor: {
-      TTI::OperandValueInfo Op1Info = {TTI::OK_AnyValue, TTI::OP_None};
-
-      // Certain instructions can be cheaper to vectorize if they have a
-      // constant second vector operand.
       const unsigned OpIdx = isa<BinaryOperator>(VL0) ? 1 : 0;
-      auto Op2Info = getOperandInfo(VL, OpIdx);
 
-      SmallVector<const Value *, 4> Operands(VL0->operand_values());
-      InstructionCost ScalarEltCost =
+      InstructionCost ScalarCost = 0;
+      for (auto *V : VL) {
+        auto *VI = cast<Instruction>(V);
+        TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(VI->getOperand(0));
+        TTI::OperandValueInfo Op2Info = TTI::getOperandInfo(VI->getOperand(OpIdx));
+        SmallVector<const Value *, 4> Operands(VI->operand_values());
+        ScalarCost +=
           TTI->getArithmeticInstrCost(E->getOpcode(), ScalarTy, CostKind,
-                                      Op1Info, Op2Info,
-                                      Operands, VL0);
-      if (NeedToShuffleReuses) {
-        CommonCost -= (EntryVF - VL.size()) * ScalarEltCost;
+                                      Op1Info, Op2Info, Operands, VI);
       }
-      InstructionCost ScalarCost = VecTy->getNumElements() * ScalarEltCost;
-      for (unsigned I = 0, Num = VL0->getNumOperands(); I < Num; ++I) {
-        if (all_of(VL, [I](Value *V) {
-              return isConstant(cast<Instruction>(V)->getOperand(I));
-            }))
-          Operands[I] = ConstantVector::getNullValue(VecTy);
+      if (NeedToShuffleReuses) {
+        CommonCost -= (EntryVF - VL.size()) * ScalarCost/VL.size();
       }
+      auto Op1Info = getOperandInfo(VL, 0);
+      auto Op2Info = getOperandInfo(VL, OpIdx);
       InstructionCost VecCost =
           TTI->getArithmeticInstrCost(E->getOpcode(), VecTy, CostKind,
-                                      Op1Info, Op2Info,
-                                      Operands, VL0);
+                                      Op1Info, Op2Info);
       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecCost, ScalarCost));
       return CommonCost + VecCost - ScalarCost;
     }
@@ -6500,23 +6494,18 @@
       auto *SI =
           cast<StoreInst>(IsReorder ? VL[E->ReorderIndices.front()] : VL0);
       Align Alignment = SI->getAlign();
-      TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(SI->getOperand(0));
-      InstructionCost ScalarEltCost = TTI->getMemoryOpCost(
-          Instruction::Store, ScalarTy, Alignment, 0, CostKind, OpInfo, VL0);
-      InstructionCost ScalarStCost = VecTy->getNumElements() * ScalarEltCost;
-      TTI::OperandValueKind OpVK = TTI::OK_AnyValue;
-      if (all_of(E->Scalars,
-                 [](Value *V) {
-                   return isConstant(cast<Instruction>(V)->getOperand(0));
-                 }) &&
-          any_of(E->Scalars, [](Value *V) {
-            Value *Op = cast<Instruction>(V)->getOperand(0);
-            return !isa<UndefValue>(Op) && !cast<Constant>(Op)->isZeroValue();
-          }))
-        OpVK = TTI::OK_NonUniformConstantValue;
-      InstructionCost VecStCost = TTI->getMemoryOpCost(
-          Instruction::Store, VecTy, Alignment, 0, CostKind,
-          {OpVK, TTI::OP_None}, VL0);
+      InstructionCost ScalarStCost = 0;
+      for (auto *V : VL) {
+        auto *VI = cast<Instruction>(V);
+        TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(VI->getOperand(0));
+        ScalarStCost +=
+          TTI->getMemoryOpCost(Instruction::Store, ScalarTy, Alignment, 0,
+                               CostKind, OpInfo, VI);
+      }
+      TTI::OperandValueInfo StoredOpInfo = getOperandInfo(VL, 0);
+      InstructionCost VecStCost =
+        TTI->getMemoryOpCost(Instruction::Store, VecTy, Alignment, 0, CostKind,
+                             StoredOpInfo);
       LLVM_DEBUG(dumpTreeCosts(E, CommonCost, VecStCost, ScalarStCost));
       return CommonCost + VecStCost - ScalarStCost;
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D132566.455222.patch
Type: text/x-patch
Size: 4232 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20220824/9c09f958/attachment.bin>