[llvm] [CostModel][AArch64] Make extractelement, with fmul user, free whenev… (PR #111479)
David Green via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 12 10:28:55 PST 2024
================
@@ -3226,6 +3227,130 @@ InstructionCost AArch64TTIImpl::getVectorInstrCostHelper(const Instruction *I,
// compile-time considerations.
}
+ // In case of Neon, if there exists extractelement from lane != 0 such that
+ // 1. extractelement does not necessitate a move from vector_reg -> GPR.
+ // 2. extractelement result feeds into fmul.
+ // 3. Other operand of fmul is an extractelement from lane 0 or lane
+ // equivalent to 0.
+ // then the extractelement can be merged with fmul in the backend and it
+ // incurs no cost.
+ // e.g.
+ // define double @foo(<2 x double> %a) {
+ // %1 = extractelement <2 x double> %a, i32 0
+ // %2 = extractelement <2 x double> %a, i32 1
+ // %res = fmul double %1, %2
+ // ret double %res
+ // }
+ // %2 and %res can be merged in the backend to generate fmul d0, d0, v1.d[1]
+ auto ExtractCanFuseWithFmul = [&]() {
+ // We bail out if the extract is from lane 0.
+ if (Index == 0)
+ return false;
+
+ // Check if the scalar element type of the vector operand of ExtractElement
+ // instruction is one of the allowed types.
+ auto IsAllowedScalarTy = [&](const Type *T) {
+ return T->isFloatTy() || T->isDoubleTy() ||
+ (T->isHalfTy() && ST->hasFullFP16());
+ };
+
+ // Check if the extractelement user is scalar fmul.
+ auto IsUserFMulScalarTy = [](const Value *EEUser) {
+ // Check if the user is scalar fmul.
+ const auto *BO = dyn_cast_if_present<BinaryOperator>(EEUser);
+ return BO && BO->getOpcode() == BinaryOperator::FMul &&
+ !BO->getType()->isVectorTy();
+ };
+
+ // Check if the type constraints on input vector type and result scalar type
+ // of extractelement instruction are satisfied.
+ auto TypeConstraintsOnEESatisfied =
+ [&IsAllowedScalarTy](const Type *VectorTy, const Type *ScalarTy) {
+ return isa<FixedVectorType>(VectorTy) && IsAllowedScalarTy(ScalarTy);
+ };
+
+ // Check if the extract index is from lane 0 or lane equivalent to 0 for a
+ // certain scalar type and a certain vector register width.
+ auto IsExtractLaneEquivalentToZero = [&](const unsigned &Idx,
+ const unsigned &EltSz) {
+ auto RegWidth =
+ getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector)
+ .getFixedValue();
+ return (Idx == 0 || (Idx * EltSz) % RegWidth == 0);
+ };
+
+ if (Opcode.has_value()) {
+ if (!TypeConstraintsOnEESatisfied(Val, Val->getScalarType()))
+ return false;
----------------
davemgreen wrote:
Can you explain what you mean?
In this case I didn't mean to remove the checking of the types, just to move it up to above the `if (Scalar) {`. Then remove the version that uses `EE->getVectorOperand()->getType()` below, as that should always be equal to `Val` if it exists.
https://github.com/llvm/llvm-project/pull/111479
More information about the llvm-commits
mailing list