[llvm] [LV] Vectorize histogram operations (PR #99851)
Graham Hunter via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 18 07:23:54 PDT 2024
================
@@ -953,6 +954,92 @@ void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
O << ")";
}
}
+#endif
+
+void VPHistogramRecipe::execute(VPTransformState &State) {
+ State.setDebugLocFrom(getDebugLoc());
+ IRBuilderBase &Builder = State.Builder;
+
+ for (unsigned Part = 0; Part < State.UF; ++Part) {
+ Value *Address = State.get(getOperand(0), Part);
+ Value *IncAmt = State.get(getOperand(1), Part, /*IsScalar=*/true);
+ VectorType *VTy = cast<VectorType>(Address->getType());
+
+ // The histogram intrinsic requires a mask even if the recipe doesn't;
+ // if the mask operand was omitted then all lanes should be executed and
+ // we just need to synthesize an all-true mask.
+ Value *Mask = nullptr;
+ if (VPValue *VPMask = getMask())
+ Mask = State.get(VPMask, Part);
+ else
+ Mask = Builder.CreateVectorSplat(
+ VTy->getElementCount(), ConstantInt::getTrue(Builder.getInt1Ty()));
+
+ // If this is a subtract, we want to invert the increment amount. We may
+ // add a separate intrinsic in future, but for now we'll try this.
+ if (Opcode == Instruction::Sub)
+ IncAmt = Builder.CreateNeg(IncAmt);
+ else
+ assert(Opcode == Instruction::Add && "only add or sub supported for now");
+
+ State.Builder.CreateIntrinsic(Intrinsic::experimental_vector_histogram_add,
+ {VTy, IncAmt->getType()},
+ {Address, IncAmt, Mask});
+ }
+}
+
+InstructionCost VPHistogramRecipe::computeCost(ElementCount VF,
+ VPCostContext &Ctx) const {
+ // FIXME: Take the gather and scatter into account as well. For now we're
+ // generating the same cost as the fallback path, but we'll likely
+ // need to create a new TTI method for determining the cost, including
+ // whether we can use base + vec-of-smaller-indices or just
+ // vec-of-pointers.
+ assert(VF.isVector() && "Invalid VF for histogram cost");
+ Value *Address = getOperand(0)->getUnderlyingValue();
+ Value *IncAmt = getOperand(1)->getUnderlyingValue();
+ Type *IncTy = IncAmt->getType();
+ VectorType *VTy = VectorType::get(IncTy, VF);
+
+ // Assume that a non-constant update value (or a constant != 1) requires
+ // a multiply, and add that into the cost.
+ InstructionCost MulCost = TTI::TCC_Free;
+ ConstantInt *RHS = dyn_cast<ConstantInt>(IncAmt);
----------------
huntergr-arm wrote:
done
https://github.com/llvm/llvm-project/pull/99851
More information about the llvm-commits
mailing list