[llvm] [LV] Vectorize histogram operations (PR #99851)

Graham Hunter via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 18 07:23:54 PDT 2024


================
@@ -953,6 +954,92 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
     O << ")";
   }
 }
+#endif
+
+void VPHistogramRecipe::execute(VPTransformState &State) {
+  State.setDebugLocFrom(getDebugLoc());
+  IRBuilderBase &Builder = State.Builder;
+
+  for (unsigned Part = 0; Part < State.UF; ++Part) {
+    Value *Address = State.get(getOperand(0), Part);
+    Value *IncAmt = State.get(getOperand(1), Part, /*IsScalar=*/true);
+    VectorType *VTy = cast<VectorType>(Address->getType());
+
+    // The histogram intrinsic requires a mask even if the recipe doesn't;
+    // if the mask operand was omitted then all lanes should be executed and
+    // we just need to synthesize an all-true mask.
+    Value *Mask = nullptr;
+    if (VPValue *VPMask = getMask())
+      Mask = State.get(VPMask, Part);
+    else
+      Mask = Builder.CreateVectorSplat(
+          VTy->getElementCount(), ConstantInt::getTrue(Builder.getInt1Ty()));
+
+    // If this is a subtract, we want to invert the increment amount. We may
+    // add a separate intrinsic in future, but for now we'll try this.
+    if (Opcode == Instruction::Sub)
+      IncAmt = Builder.CreateNeg(IncAmt);
+    else
+      assert(Opcode == Instruction::Add && "only add or sub supported for now");
+
+    State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
+                                  {VTy, IncAmt->getType()},
+                                  {Address, IncAmt, Mask});
+  }
+}
+
+InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,
+                                               VPCostContext &Ctx) const {
+  // FIXME: Take the gather and scatter into account as well. For now we're
+  //        generating the same cost as the fallback path, but we'll likely
+  //        need to create a new TTI method for determining the cost, including
+  //        whether we can use base + vec-of-smaller-indices or just
+  //        vec-of-pointers.
+  assert(VF.isVector() && "Invalid VF for histogram cost");
+  Value *Address = getOperand(0)->getUnderlyingValue();
+  Value *IncAmt = getOperand(1)->getUnderlyingValue();
+  Type *IncTy = IncAmt->getType();
+  VectorType *VTy = VectorType::get(IncTy, VF);
+
+  // Assume that a non-constant update value (or a constant != 1) requires
+  // a multiply, and add that into the cost.
+  InstructionCost MulCost = TTI::TCC_Free;
+  ConstantInt *RHS = dyn_cast<ConstantInt>(IncAmt);
----------------
huntergr-arm wrote:

done

https://github.com/llvm/llvm-project/pull/99851


More information about the llvm-commits mailing list