[llvm] [LV] Vectorize histogram operations (PR #99851)

Wed Sep 25 13:09:05 PDT 2024

================
@@ -1047,6 +1048,96 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
     O << ")";
   }
 }
+#endif
+
+void VPHistogramRecipe::execute(VPTransformState &State) {
+  State.setDebugLocFrom(getDebugLoc());
+  IRBuilderBase &Builder = State.Builder;
+
+  for (unsigned Part = 0; Part < State.UF; ++Part) {
+    Value *Address = State.get(getOperand(0), Part);
+    Value *IncAmt = State.get(getOperand(1), Part, /*IsScalar=*/true);
+    VectorType *VTy = cast<VectorType>(Address->getType());
+
+    // The histogram intrinsic requires a mask even if the recipe doesn't;
+    // if the mask operand was omitted then all lanes should be executed and
+    // we just need to synthesize an all-true mask.
+    Value *Mask = nullptr;
+    if (VPValue *VPMask = getMask())
+      Mask = State.get(VPMask, Part);
+    else
+      Mask = Builder.CreateVectorSplat(
+          VTy->getElementCount(), ConstantInt::getTrue(Builder.getInt1Ty()));
+
+    // If this is a subtract, we want to invert the increment amount. We may
+    // add a separate intrinsic in future, but for now we'll try this.
+    if (Opcode == Instruction::Sub)
+      IncAmt = Builder.CreateNeg(IncAmt);
+    else
+      assert(Opcode == Instruction::Add && "only add or sub supported for now");
+
+    State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
+                                  {VTy, IncAmt->getType()},
+                                  {Address, IncAmt, Mask});
+  }
+}
+
+InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,
+                                               VPCostContext &Ctx) const {
+  // FIXME: Take the gather and scatter into account as well. For now we're
+  //        generating the same cost as the fallback path, but we'll likely
+  //        need to create a new TTI method for determining the cost, including
+  //        whether we can use base + vec-of-smaller-indices or just
+  //        vec-of-pointers.
+  assert(VF.isVector() && "Invalid VF for histogram cost");
+  Type *AddressTy = Ctx.Types.inferScalarType(getOperand(0));
+  VPValue *IncAmt = getOperand(1);
+  Type *IncTy = Ctx.Types.inferScalarType(IncAmt);
+  VectorType *VTy = VectorType::get(IncTy, VF);
+
+  // Assume that a non-constant update value (or a constant != 1) requires
+  // a multiply, and add that into the cost.
+  Value *RHS = IncAmt->getUnderlyingValue();
+  // The underlying value may be null, check for a live-in if so.
+  if (!RHS && IncAmt->isLiveIn())
----------------
fhahn wrote:

Is RHS only used to check if it is constant? It can only be constant if it is a live-in, so retrieving getUnderlyingValue first can be skipped?

https://github.com/llvm/llvm-project/pull/99851