[llvm] [LV] Vectorize conditional scalar assignments (PR #158088)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 07:36:35 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-vectorizers
Author: Graham Hunter (huntergr-arm)
<details>
<summary>Changes</summary>
Based on Michael Maitland's previous work:
https://github.com/llvm/llvm-project/pull/121222
This PR uses the existing recurrences code instead of introducing a
new pass just for CSA autovec. I've also made recipes that are more
generic.
I've enabled it by default to see the impact on tests; if there are
regressions we can put it behind a cli option. I haven't corrected
all the comments for the tests, I'll wait until we decide whether
to keep it enabled by default first.
I will be doing some performance runs on AArch64 to figure out
the cost model, as we mostly regard vector selects as per-lane
instead of selecting the whole vector at once.
---
Patch is 204.91 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/158088.diff
19 Files Affected:
- (modified) llvm/include/llvm/Analysis/IVDescriptors.h (+15)
- (modified) llvm/lib/Analysis/IVDescriptors.cpp (+44-1)
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+8)
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+32-1)
- (modified) llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp (+3)
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+44)
- (modified) llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp (+7-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+69-3)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+67)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.h (+7)
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+1)
- (added) llvm/test/Transforms/LoopVectorize/AArch64/conditional-scalar-assignment.ll (+155)
- (added) llvm/test/Transforms/LoopVectorize/conditional-scalar-assignment-vplan.ll (+138)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-decreasing.ll (+294-38)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-no-wrap.ll (+80-8)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-non-const-iv-start.ll (+321-52)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp-trunc.ll (+450-100)
- (modified) llvm/test/Transforms/LoopVectorize/iv-select-cmp.ll (+171-19)
- (modified) llvm/test/Transforms/LoopVectorize/select-cmp.ll (+121-20)
``````````diff
diff --git a/llvm/include/llvm/Analysis/IVDescriptors.h b/llvm/include/llvm/Analysis/IVDescriptors.h
index f9e6da6d0846a..afa175704a7b1 100644
--- a/llvm/include/llvm/Analysis/IVDescriptors.h
+++ b/llvm/include/llvm/Analysis/IVDescriptors.h
@@ -70,6 +70,9 @@ enum class RecurKind {
FindLastIVUMax, ///< FindLast reduction with select(cmp(),x,y) where one of
///< (x,y) is increasing loop induction, and both x and y
///< are integer type, producing a UMax reduction.
+ FindLast, ///< FindLast reduction with select(cmp(),x,y) where x and y
+ ///< can be any scalar type, one is the current recurrence
+ ///< value, and the other is an arbitrary value.
// clang-format on
// TODO: Any_of and FindLast reduction need not be restricted to integer type
// only.
@@ -183,6 +186,12 @@ class RecurrenceDescriptor {
PHINode *OrigPhi, Instruction *I,
ScalarEvolution &SE);
+ /// Returns a struct describing whether the instruction is of the form
+ /// Select(Cmp(A, B), X, Y)
+ /// where one of (X, Y) is the Phi value and the other is an arbitrary value.
+ LLVM_ABI static InstDesc isFindLastPattern(Instruction *I, PHINode *Phi,
+ Loop *TheLoop);
+
/// Returns a struct describing if the instruction is a
/// Select(FCmp(X, Y), (Z = X op PHINode), PHINode) instruction pattern.
LLVM_ABI static InstDesc isConditionalRdxPattern(Instruction *I);
@@ -299,6 +308,12 @@ class RecurrenceDescriptor {
isFindLastIVRecurrenceKind(Kind);
}
+ /// Returns true if the recurrence kind is of the form
+ /// select(cmp(),x,y) where one of (x,y) is an arbitrary value.
+ static bool isFindLastRecurrenceKind(RecurKind Kind) {
+ return Kind == RecurKind::FindLast;
+ }
+
/// Returns the type of the recurrence. This type can be narrower than the
/// actual type of the Phi if the recurrence has been type-promoted.
Type *getRecurrenceType() const { return RecurrenceType; }
diff --git a/llvm/lib/Analysis/IVDescriptors.cpp b/llvm/lib/Analysis/IVDescriptors.cpp
index b8c540ce4b99d..bd87e9de46bd5 100644
--- a/llvm/lib/Analysis/IVDescriptors.cpp
+++ b/llvm/lib/Analysis/IVDescriptors.cpp
@@ -56,6 +56,8 @@ bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
+ // TODO: Make type-agnostic.
+ case RecurKind::FindLast:
return true;
}
return false;
@@ -426,6 +428,8 @@ bool RecurrenceDescriptor::AddReductionVar(
++NumCmpSelectPatternInst;
if (isAnyOfRecurrenceKind(Kind) && IsASelect)
++NumCmpSelectPatternInst;
+ if (isFindLastRecurrenceKind(Kind) && IsASelect)
+ ++NumCmpSelectPatternInst;
// Check whether we found a reduction operator.
FoundReduxOp |= !IsAPhi && Cur != Start;
@@ -789,6 +793,38 @@ RecurrenceDescriptor::isFindIVPattern(RecurKind Kind, Loop *TheLoop,
return InstDesc(false, I);
}
+RecurrenceDescriptor::InstDesc
+RecurrenceDescriptor::isFindLastPattern(Instruction *I, PHINode *Phi,
+ Loop *TheLoop) {
+ // Must be a scalar.
+ Type *Type = Phi->getType();
+ if (!Type->isIntegerTy() && !Type->isFloatingPointTy() &&
+ !Type->isPointerTy())
+ return InstDesc(false, I);
+
+ SelectInst *Select = dyn_cast<SelectInst>(I);
+ if (!Select)
+ return InstDesc(false, I);
+
+ // FIXME: Support more complex patterns, including multiple selects.
+ // Phi or Select must be used only outside the loop,
+ // except for each other.
+ auto IsOnlyUsedOutsideLoop = [&](Value *V, Value *Ignore) {
+ return all_of(V->users(), [Ignore, TheLoop](User *U) {
+ if (U == Ignore)
+ return true;
+ if (auto *I = dyn_cast<Instruction>(U))
+ return !TheLoop->contains(I);
+ return false;
+ });
+ };
+ if (!IsOnlyUsedOutsideLoop(Phi, Select) ||
+ !IsOnlyUsedOutsideLoop(Select, Phi))
+ return InstDesc(false, I);
+
+ return InstDesc(I, RecurKind::FindLast);
+}
+
RecurrenceDescriptor::InstDesc
RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
const InstDesc &Prev) {
@@ -927,6 +963,8 @@ RecurrenceDescriptor::InstDesc RecurrenceDescriptor::isRecurrenceInstr(
return isConditionalRdxPattern(I);
if (isFindIVRecurrenceKind(Kind) && SE)
return isFindIVPattern(Kind, L, OrigPhi, I, *SE);
+ if (isFindLastRecurrenceKind(Kind))
+ return isFindLastPattern(I, OrigPhi, L);
[[fallthrough]];
case Instruction::FCmp:
case Instruction::ICmp:
@@ -1123,7 +1161,11 @@ bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
<< "\n");
return true;
}
-
+ if (AddReductionVar(Phi, RecurKind::FindLast, TheLoop, FMF, RedDes, DB, AC, DT,
+ SE)) {
+ LLVM_DEBUG(dbgs() << "Found a FindLast reduction PHI." << *Phi << "\n");
+ return true;
+ }
// Not a reduction of known type.
return false;
}
@@ -1245,6 +1287,7 @@ unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
case RecurKind::SMin:
case RecurKind::UMax:
case RecurKind::UMin:
+ case RecurKind::FindLast:
return Instruction::ICmp;
case RecurKind::FMax:
case RecurKind::FMin:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 92321a76dbd80..6595c6e770be0 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1004,6 +1004,13 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
}
break;
}
+ case Intrinsic::experimental_vector_extract_last_active:
+ if (ST->isSVEAvailable()) {
+ auto [LegalCost, _] = getTypeLegalizationCost(ICA.getArgTypes()[0]);
+ // This should turn into chained clastb instructions.
+ return LegalCost;
+ }
+ break;
default:
break;
}
@@ -5325,6 +5332,7 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
case RecurKind::FMax:
case RecurKind::FMulAdd:
case RecurKind::AnyOf:
+ case RecurKind::FindLast:
return true;
default:
return false;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index b4acda80cfb93..ea85685cdf7b8 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4047,6 +4047,7 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPWidenIntrinsicSC:
case VPDef::VPWidenSC:
case VPDef::VPWidenSelectSC:
+ case VPDef::VPWidenSelectVectorSC:
case VPDef::VPBlendSC:
case VPDef::VPFirstOrderRecurrencePHISC:
case VPDef::VPHistogramSC:
@@ -4546,6 +4547,11 @@ LoopVectorizationPlanner::selectInterleaveCount(VPlan &Plan, ElementCount VF,
any_of(Plan.getVectorLoopRegion()->getEntryBasicBlock()->phis(),
IsaPred<VPReductionPHIRecipe>);
+ // FIXME: implement interleaving for FindLast transform correctly.
+ for (auto &[_, RdxDesc] : Legal->getReductionVars())
+ if (RecurrenceDescriptor::isFindLastRecurrenceKind(RdxDesc.getRecurrenceKind()))
+ return 1;
+
// If we did not calculate the cost for VF (because the user selected the VF)
// then we calculate the cost of VF here.
if (LoopCost == 0) {
@@ -8687,6 +8693,10 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
*Plan, Builder))
return nullptr;
+ // Create whole-vector selects for find-last recurrences.
+ VPlanTransforms::runPass(VPlanTransforms::convertFindLastRecurrences,
+ *Plan, RecipeBuilder, Legal);
+
if (useActiveLaneMask(Style)) {
// TODO: Move checks to VPlanTransforms::addActiveLaneMask once
// TailFoldingStyle is visible there.
@@ -8779,6 +8789,7 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
RecurKind Kind = PhiR->getRecurrenceKind();
assert(
+ !RecurrenceDescriptor::isFindLastRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isAnyOfRecurrenceKind(Kind) &&
!RecurrenceDescriptor::isFindIVRecurrenceKind(Kind) &&
"AnyOf and FindIV reductions are not allowed for in-loop reductions");
@@ -8987,6 +8998,10 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
FinalReductionResult =
Builder.createNaryOp(VPInstruction::ComputeAnyOfResult,
{PhiR, Start, NewExitingVPV}, ExitDL);
+ } else if (RecurrenceDescriptor::isFindLastRecurrenceKind(
+ RdxDesc.getRecurrenceKind())) {
+ FinalReductionResult = Builder.createNaryOp(
+ VPInstruction::ExtractLastActive, {NewExitingVPV}, ExitDL);
} else {
VPIRFlags Flags =
RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind)
@@ -9076,7 +9091,8 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
RecurKind RK = RdxDesc.getRecurrenceKind();
if ((!RecurrenceDescriptor::isAnyOfRecurrenceKind(RK) &&
!RecurrenceDescriptor::isFindIVRecurrenceKind(RK) &&
- !RecurrenceDescriptor::isMinMaxRecurrenceKind(RK))) {
+ !RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) &&
+ !RecurrenceDescriptor::isFindLastRecurrenceKind(RK))) {
VPBuilder PHBuilder(Plan->getVectorPreheader());
VPValue *Iden = Plan->getOrAddLiveIn(
getRecurrenceIdentity(RK, PhiTy, RdxDesc.getFastMathFlags()));
@@ -10069,6 +10085,21 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// Override IC if user provided an interleave count.
IC = UserIC > 0 ? UserIC : IC;
+ // FIXME: Enable interleaving for last_active reductions.
+ if (any_of(LVL.getReductionVars(), [&](auto &Reduction) -> bool {
+ const RecurrenceDescriptor &RdxDesc = Reduction.second;
+ return RecurrenceDescriptor::isFindLastRecurrenceKind(RdxDesc.getRecurrenceKind());
+ })) {
+ LLVM_DEBUG(dbgs() << "LV: Not interleaving without vectorization due "
+ << "to conditional scalar assignments.\n");
+ IntDiagMsg = {
+ "ConditionalAssignmentPreventsScalarInterleaving",
+ "Unable to interleave without vectorization due to conditional "
+ "assignments"};
+ InterleaveLoop = false;
+ IC = 1;
+ }
+
// Emit diagnostic messages, if any.
const char *VAPassName = Hints.vectorizeAnalysisPassName();
if (!VectorizeLoop && !InterleaveLoop) {
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 75cace77ec534..7b25731af19d8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -24868,6 +24868,7 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
+ case RecurKind::FindLast:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
@@ -25009,6 +25010,7 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
+ case RecurKind::FindLast:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
@@ -25115,6 +25117,7 @@ class HorizontalReduction {
case RecurKind::FindFirstIVUMin:
case RecurKind::FindLastIVSMax:
case RecurKind::FindLastIVUMax:
+ case RecurKind::FindLast:
case RecurKind::FMaxNum:
case RecurKind::FMinNum:
case RecurKind::FMaximumNum:
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 53291a931530f..2ffe68fedee05 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -548,6 +548,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenIntrinsicSC:
case VPRecipeBase::VPWidenSC:
case VPRecipeBase::VPWidenSelectSC:
+ case VPRecipeBase::VPWidenSelectVectorSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
case VPRecipeBase::VPCanonicalIVPHISC:
@@ -1059,6 +1060,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
ResumeForEpilogue,
/// Returns the value for vscale.
VScale,
+ // Extracts the last active lane based on a predicate vector operand.
+ ExtractLastActive,
};
private:
@@ -1749,6 +1752,47 @@ struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags,
unsigned getOpcode() const { return Instruction::Select; }
+ VPValue *getCond() const { return getOperand(0); }
+
+ bool isInvariantCond() const {
+ return getCond()->isDefinedOutsideLoopRegions();
+ }
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return Op == getCond() && isInvariantCond();
+ }
+};
+
+/// A recipe for selecting whole vector values.
+struct VPWidenSelectVectorRecipe : public VPRecipeWithIRFlags {
+ VPWidenSelectVectorRecipe(ArrayRef<VPValue *> Operands)
+ : VPRecipeWithIRFlags(VPDef::VPWidenSelectVectorSC, Operands) {}
+
+ ~VPWidenSelectVectorRecipe() override = default;
+
+ VPWidenSelectVectorRecipe *clone() override {
+ SmallVector<VPValue *, 3> Operands(operands());
+ return new VPWidenSelectVectorRecipe(Operands);
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenSelectVectorSC)
+
+ /// Produce a widened version of the select instruction.
+ void execute(VPTransformState &State) override;
+
+ /// Return the cost of this VPWidenSelectVectorRecipe.
+ InstructionCost computeCost(ElementCount VF,
+ VPCostContext &Ctx) const override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override;
+#endif
+
VPValue *getCond() const {
return getOperand(0);
}
diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
index d400ceff7797c..a299ab8593a2f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp
@@ -115,7 +115,8 @@ Type *VPTypeAnalysis::inferScalarTypeForRecipe(const VPInstruction *R) {
case VPInstruction::FirstActiveLane:
return Type::getIntNTy(Ctx, 64);
case VPInstruction::ExtractLastElement:
- case VPInstruction::ExtractPenultimateElement: {
+ case VPInstruction::ExtractPenultimateElement:
+ case VPInstruction::ExtractLastActive: {
Type *BaseTy = inferScalarType(R->getOperand(0));
if (auto *VecTy = dyn_cast<VectorType>(BaseTy))
return VecTy->getElementType();
@@ -308,7 +309,11 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) {
})
.Case<VPExpressionRecipe>([this](const auto *R) {
return inferScalarType(R->getOperandOfResultType());
- });
+ })
+ .Case<VPWidenSelectVectorRecipe>(
+ [this](const VPWidenSelectVectorRecipe *R) {
+ return inferScalarType(R->getOperand(1));
+ });
assert(ResultTy && "could not infer type for the given VPValue");
CachedTypes[V] = ResultTy;
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index bf51489543098..598fa4888fe8a 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -86,7 +86,8 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenLoadSC:
case VPWidenPHISC:
case VPWidenSC:
- case VPWidenSelectSC: {
+ case VPWidenSelectSC:
+ case VPWidenSelectVectorSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -134,7 +135,8 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
case VPWidenSC:
- case VPWidenSelectSC: {
+ case VPWidenSelectSC:
+ case VPWidenSelectVectorSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -177,7 +179,8 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenPHISC:
case VPWidenPointerInductionSC:
case VPWidenSC:
- case VPWidenSelectSC: {
+ case VPWidenSelectSC:
+ case VPWidenSelectVectorSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
(void)I;
@@ -522,6 +525,7 @@ unsigned VPInstruction::getNumOperandsForOpcode(unsigned Opcode) {
case VPInstruction::ActiveLaneMask:
case VPInstruction::ComputeAnyOfResult:
case VPInstruction::ReductionStartVector:
+ case VPInstruction::ExtractLastActive:
return 3;
case VPInstruction::ComputeFindIVResult:
return 4;
@@ -983,6 +987,17 @@ Value *VPInstruction::generate(VPTransformState &State) {
}
case VPInstruction::ResumeForEpilogue:
return State.get(getOperand(0), true);
+ case VPInstruction::ExtractLastActive: {
+ Value *Data = State.get(getOperand(0));
+ Value *Mask = State.get(getOperand(1));
+ Value *Default = State.get(getOperand(2), /*IsScalar=*/true);
+ Type *VTy = Data->getType();
+
+ Module *M = State.Builder.GetInsertBlock()->getModule();
+ Function *ExtractLast = Intrinsic::getOrInsertDeclaration(
+ M, Intrinsic::experimental_vector_extract_last_active, {VTy});
+ return Builder.CreateCall(ExtractLast, {Data, Mask, Default});
+ }
default:
llvm_unreachable("Unsupported opcode for instruction");
}
@@ -1119,6 +1134,14 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
{PredTy, Type::getInt1Ty(Ctx.LLVMCtx)});
return Ctx.TTI.getIntrinsicInstrCost(Attrs, Ctx.CostKind);
}
+ case VPInstruction::ExtractLastActive: {
+ Type *ScalarTy = Ctx.Types.inferScalarType(this);
+ Type *VecTy = toVectorTy(ScalarTy, VF);
+ Type *MaskTy = toVectorTy(Type::getInt1Ty(Ctx.LLVMCtx), VF);
+ IntrinsicCostAttributes ICA(Intrinsic::experimental_vector_extract_last_active,
+ ScalarTy, {VecTy, MaskTy, ScalarTy});
+ return Ctx.TTI.getIntrinsicInstrCost(ICA, Ctx.CostKind);
+ }
case VPInstruction::FirstOrderRecurrenceSplice: {
assert(VF.isVector() && "Scalar FirstOrderRecurrenceSplice?");
SmallVector<int> Mask(VF.getKnownMinValue());
@@ -1174,6 +1197,7 @@ bool VPInstruction::isVectorToScalar() const {
getOpcode() == VPInstruction::FirstActiveLane ||
getOpcode() == VPInstruction::ComputeAnyOfResult ||
getOpcode() == VPInstruction::ComputeFindIVResult ||
+ getOpcode() == VPInstruction::ExtractLastActive ||
getOpcode() == VPInstruction::ComputeReductionResult ||
getOpcode() == VPInstruction::AnyOf;
}
@@ -1243,6 +1267,7 @@ bool VPInstruction::opcodeMayReadOrWriteFromMemory() const {
case VPInstruction::ExtractLastElement:
case VPInstruction::ExtractPenultimateElement:
case VPInstruction::FirstActiveLane:
+ case VPInstruction::ExtractLastActive:
case VPInstruction::FirstOrderRecurrenceSplice:
case VPInstruction::LogicalAnd:
case VPInstruction::Not:
@@ -1414,6 +1439,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent,
case VPInstruction::ResumeForEpilogue:
O << "resume-for-epilogue";
break;
+ case VPInstruction::ExtractLastActive:
+ O << "extract-last-active";
+ break;
default:
O << Instruction::getOpcodeName(getOpcode());
}
@@ -1927,7 +1955,9 @@ void VPHistogramRecipe::print(raw_ostream &O, const Twine &Indent,
Mask->printAsOperand(O, SlotTracker);
}
}
+#endif
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << "WIDEN-SELECT ";
@@ -2002,6 +2032,42 @@ InstructionCost VPWidenS...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/158088
More information about the llvm-commits
mailing list