[llvm] [LV][VPlan] Extract the implementation of transform Recipe to EVLRecipe into a small function. NFC (PR #119510)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Dec 15 18:16:34 PST 2024
https://github.com/LiqinWeng updated https://github.com/llvm/llvm-project/pull/119510
>From 7df93970812868ccee89c72ae4cf2af28251269f Mon Sep 17 00:00:00 2001
From: "Liqin.Weng" <liqin.weng at spacemit.com>
Date: Wed, 11 Dec 2024 13:58:41 +0800
Subject: [PATCH 1/5] [LV][VPlan] Extract the implementation of transform
Recipe to EVLRecipe into a small function
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 195 +++++++++---------
1 file changed, 96 insertions(+), 99 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 922cba7831f4e9..48e3914821191e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1438,13 +1438,100 @@ void VPlanTransforms::addActiveLaneMask(
HeaderMask->replaceAllUsesWith(LaneMask);
}
+static VPRecipeBase *createEVLRecipe(VPValue &EVL, VPValue *HeaderMask,
+ VPValue *AllOneMask,
+ VPRecipeBase *CurRecipe,
+ VPTypeAnalysis TypeInfo) {
+ using namespace llvm::VPlanPatternMatch;
+ auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
+ assert(OrigMask && "Unmasked recipe when folding tail");
+ return HeaderMask == OrigMask ? nullptr : OrigMask;
+ };
+
+ return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(CurRecipe)
+ .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
+ VPValue *NewMask = GetNewMask(L->getMask());
+ return new VPWidenLoadEVLRecipe(*L, EVL, NewMask);
+ })
+ .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
+ VPValue *NewMask = GetNewMask(S->getMask());
+ return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
+ })
+ .Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
+ unsigned Opcode = W->getOpcode();
+ if (!Instruction::isBinaryOp(Opcode) && !Instruction::isUnaryOp(Opcode))
+ return nullptr;
+ return new VPWidenEVLRecipe(*W, EVL);
+ })
+ .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
+ VPValue *NewMask = GetNewMask(Red->getCondOp());
+ return new VPReductionEVLRecipe(*Red, EVL, NewMask);
+ })
+ .Case<VPWidenIntrinsicRecipe>(
+ [&](VPWidenIntrinsicRecipe *CInst) -> VPRecipeBase * {
+ auto *CI = cast<CallInst>(CInst->getUnderlyingInstr());
+ Intrinsic::ID VPID = VPIntrinsic::getForIntrinsic(
+ CI->getCalledFunction()->getIntrinsicID());
+ assert(VPID != Intrinsic::not_intrinsic &&
+ "Expected VP Instrinsic");
+
+ SmallVector<VPValue *> Ops(CInst->operands());
+ assert(VPIntrinsic::getMaskParamPos(VPID) &&
+ VPIntrinsic::getVectorLengthParamPos(VPID) &&
+ "Expected VP intrinsic");
+
+ Ops.push_back(AllOneMask);
+ Ops.push_back(&EVL);
+ return new VPWidenIntrinsicRecipe(*CI, VPID, Ops,
+ TypeInfo.inferScalarType(CInst),
+ CInst->getDebugLoc());
+ })
+ .Case<VPWidenCastRecipe>([&](VPWidenCastRecipe *CInst) -> VPRecipeBase * {
+ auto *CI = dyn_cast<CastInst>(CInst->getUnderlyingInstr());
+ Intrinsic::ID VPID = VPIntrinsic::getForOpcode(CI->getOpcode());
+ assert(VPID != Intrinsic::not_intrinsic &&
+ "Expected vp.casts Instrinsic");
+
+ SmallVector<VPValue *> Ops(CInst->operands());
+ assert(VPIntrinsic::getMaskParamPos(VPID) &&
+ VPIntrinsic::getVectorLengthParamPos(VPID) &&
+ "Expected VP intrinsic");
+ Ops.push_back(AllOneMask);
+ Ops.push_back(&EVL);
+ return new VPWidenIntrinsicRecipe(
+ VPID, Ops, TypeInfo.inferScalarType(CInst), CInst->getDebugLoc());
+ })
+ .Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
+ SmallVector<VPValue *> Ops(Sel->operands());
+ Ops.push_back(&EVL);
+ return new VPWidenIntrinsicRecipe(Intrinsic::vp_select, Ops,
+ TypeInfo.inferScalarType(Sel),
+ Sel->getDebugLoc());
+ })
+ .Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
+ VPValue *LHS, *RHS;
+ // Transform select with a header mask condition
+ // select(header_mask, LHS, RHS)
+ // into vector predication merge.
+ // vp.merge(all-true, LHS, RHS, EVL)
+ if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS),
+ m_VPValue(RHS))))
+ return nullptr;
+ // Use all true as the condition because this transformation is
+ // limited to selects whose condition is a header mask.
+ return new VPWidenIntrinsicRecipe(
+ Intrinsic::vp_merge, {AllOneMask, LHS, RHS, &EVL},
+ TypeInfo.inferScalarType(LHS), VPI->getDebugLoc());
+ })
+ .Default([&](VPRecipeBase *R) { return nullptr; });
+}
+
/// Replace recipes with their EVL variants.
static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
- using namespace llvm::VPlanPatternMatch;
Type *CanonicalIVType = Plan.getCanonicalIV()->getScalarType();
VPTypeAnalysis TypeInfo(CanonicalIVType);
LLVMContext &Ctx = CanonicalIVType->getContext();
- SmallVector<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
+ VPValue *AllOneMask = Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
for (VPUser *U : Plan.getVF().users()) {
if (auto *R = dyn_cast<VPReverseVectorPointerRecipe>(U))
@@ -1454,112 +1541,22 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
auto *CurRecipe = cast<VPRecipeBase>(U);
- auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
- assert(OrigMask && "Unmasked recipe when folding tail");
- return HeaderMask == OrigMask ? nullptr : OrigMask;
- };
-
- VPRecipeBase *NewRecipe =
- TypeSwitch<VPRecipeBase *, VPRecipeBase *>(CurRecipe)
- .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
- VPValue *NewMask = GetNewMask(L->getMask());
- return new VPWidenLoadEVLRecipe(*L, EVL, NewMask);
- })
- .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
- VPValue *NewMask = GetNewMask(S->getMask());
- return new VPWidenStoreEVLRecipe(*S, EVL, NewMask);
- })
- .Case<VPWidenRecipe>([&](VPWidenRecipe *W) -> VPRecipeBase * {
- unsigned Opcode = W->getOpcode();
- if (!Instruction::isBinaryOp(Opcode) &&
- !Instruction::isUnaryOp(Opcode))
- return nullptr;
- return new VPWidenEVLRecipe(*W, EVL);
- })
- .Case<VPReductionRecipe>([&](VPReductionRecipe *Red) {
- VPValue *NewMask = GetNewMask(Red->getCondOp());
- return new VPReductionEVLRecipe(*Red, EVL, NewMask);
- })
- .Case<VPWidenIntrinsicRecipe>(
- [&](VPWidenIntrinsicRecipe *CInst) -> VPRecipeBase * {
- auto *CI = cast<CallInst>(CInst->getUnderlyingInstr());
- Intrinsic::ID VPID = VPIntrinsic::getForIntrinsic(
- CI->getCalledFunction()->getIntrinsicID());
- if (VPID == Intrinsic::not_intrinsic)
- return nullptr;
-
- SmallVector<VPValue *> Ops(CInst->operands());
- assert(VPIntrinsic::getMaskParamPos(VPID) &&
- VPIntrinsic::getVectorLengthParamPos(VPID) &&
- "Expected VP intrinsic");
- VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::getTrue(
- IntegerType::getInt1Ty(CI->getContext())));
- Ops.push_back(Mask);
- Ops.push_back(&EVL);
- return new VPWidenIntrinsicRecipe(
- *CI, VPID, Ops, TypeInfo.inferScalarType(CInst),
- CInst->getDebugLoc());
- })
- .Case<VPWidenCastRecipe>(
- [&](VPWidenCastRecipe *CInst) -> VPRecipeBase * {
- auto *CI = dyn_cast<CastInst>(CInst->getUnderlyingInstr());
- Intrinsic::ID VPID =
- VPIntrinsic::getForOpcode(CI->getOpcode());
- assert(VPID != Intrinsic::not_intrinsic &&
- "Expected vp.casts Instrinsic");
-
- SmallVector<VPValue *> Ops(CInst->operands());
- assert(VPIntrinsic::getMaskParamPos(VPID) &&
- VPIntrinsic::getVectorLengthParamPos(VPID) &&
- "Expected VP intrinsic");
- VPValue *Mask = Plan.getOrAddLiveIn(ConstantInt::getTrue(
- IntegerType::getInt1Ty(CI->getContext())));
- Ops.push_back(Mask);
- Ops.push_back(&EVL);
- return new VPWidenIntrinsicRecipe(
- VPID, Ops, TypeInfo.inferScalarType(CInst),
- CInst->getDebugLoc());
- })
- .Case<VPWidenSelectRecipe>([&](VPWidenSelectRecipe *Sel) {
- SmallVector<VPValue *> Ops(Sel->operands());
- Ops.push_back(&EVL);
- return new VPWidenIntrinsicRecipe(Intrinsic::vp_select, Ops,
- TypeInfo.inferScalarType(Sel),
- Sel->getDebugLoc());
- })
- .Case<VPInstruction>([&](VPInstruction *VPI) -> VPRecipeBase * {
- VPValue *LHS, *RHS;
- // Transform select with a header mask condition
- // select(header_mask, LHS, RHS)
- // into vector predication merge.
- // vp.merge(all-true, LHS, RHS, EVL)
- if (!match(VPI, m_Select(m_Specific(HeaderMask), m_VPValue(LHS),
- m_VPValue(RHS))))
- return nullptr;
- // Use all true as the condition because this transformation is
- // limited to selects whose condition is a header mask.
- VPValue *AllTrue =
- Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx));
- return new VPWidenIntrinsicRecipe(
- Intrinsic::vp_merge, {AllTrue, LHS, RHS, &EVL},
- TypeInfo.inferScalarType(LHS), VPI->getDebugLoc());
- })
- .Default([&](VPRecipeBase *R) { return nullptr; });
-
- if (!NewRecipe)
+ VPRecipeBase *EVLRecipe =
+ createEVLRecipe(EVL, HeaderMask, AllOneMask, CurRecipe, TypeInfo);
+ if (!EVLRecipe)
continue;
- [[maybe_unused]] unsigned NumDefVal = NewRecipe->getNumDefinedValues();
+ [[maybe_unused]] unsigned NumDefVal = EVLRecipe->getNumDefinedValues();
assert(NumDefVal == CurRecipe->getNumDefinedValues() &&
"New recipe must define the same number of values as the "
"original.");
assert(
NumDefVal <= 1 &&
"Only supports recipes with a single definition or without users.");
- NewRecipe->insertBefore(CurRecipe);
- if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(NewRecipe)) {
+ EVLRecipe->insertBefore(CurRecipe);
+ if (isa<VPSingleDefRecipe, VPWidenLoadEVLRecipe>(EVLRecipe)) {
VPValue *CurVPV = CurRecipe->getVPSingleValue();
- CurVPV->replaceAllUsesWith(NewRecipe->getVPSingleValue());
+ CurVPV->replaceAllUsesWith(EVLRecipe->getVPSingleValue());
}
CurRecipe->eraseFromParent();
}
>From b8e788140c04039b20ed2ad76cd71897fa1acaad Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Thu, 12 Dec 2024 15:02:15 +0800
Subject: [PATCH 2/5] address the comments
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 34 +++++++++----------
1 file changed, 16 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 48e3914821191e..6d8b16a45c5a61 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1438,17 +1438,18 @@ void VPlanTransforms::addActiveLaneMask(
HeaderMask->replaceAllUsesWith(LaneMask);
}
-static VPRecipeBase *createEVLRecipe(VPValue &EVL, VPValue *HeaderMask,
- VPValue *AllOneMask,
- VPRecipeBase *CurRecipe,
- VPTypeAnalysis TypeInfo) {
+/// Create EVLRecipe with Recipe
+static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
+ VPRecipeBase &CurRecipe,
+ VPTypeAnalysis &TypeInfo,
+ VPValue &AllOneMask, VPValue &EVL) {
using namespace llvm::VPlanPatternMatch;
auto GetNewMask = [&](VPValue *OrigMask) -> VPValue * {
assert(OrigMask && "Unmasked recipe when folding tail");
return HeaderMask == OrigMask ? nullptr : OrigMask;
};
- return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(CurRecipe)
+ return TypeSwitch<VPRecipeBase *, VPRecipeBase *>(&CurRecipe)
.Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
VPValue *NewMask = GetNewMask(L->getMask());
return new VPWidenLoadEVLRecipe(*L, EVL, NewMask);
@@ -1469,18 +1470,16 @@ static VPRecipeBase *createEVLRecipe(VPValue &EVL, VPValue *HeaderMask,
})
.Case<VPWidenIntrinsicRecipe>(
[&](VPWidenIntrinsicRecipe *CInst) -> VPRecipeBase * {
- auto *CI = cast<CallInst>(CInst->getUnderlyingInstr());
+ auto *CI = dyn_cast<CallInst>(CInst->getUnderlyingInstr());
Intrinsic::ID VPID = VPIntrinsic::getForIntrinsic(
CI->getCalledFunction()->getIntrinsicID());
assert(VPID != Intrinsic::not_intrinsic &&
- "Expected VP Instrinsic");
-
- SmallVector<VPValue *> Ops(CInst->operands());
- assert(VPIntrinsic::getMaskParamPos(VPID) &&
+ VPIntrinsic::getMaskParamPos(VPID) &&
VPIntrinsic::getVectorLengthParamPos(VPID) &&
"Expected VP intrinsic");
- Ops.push_back(AllOneMask);
+ SmallVector<VPValue *> Ops(CInst->operands());
+ Ops.push_back(&AllOneMask);
Ops.push_back(&EVL);
return new VPWidenIntrinsicRecipe(*CI, VPID, Ops,
TypeInfo.inferScalarType(CInst),
@@ -1490,13 +1489,12 @@ static VPRecipeBase *createEVLRecipe(VPValue &EVL, VPValue *HeaderMask,
auto *CI = dyn_cast<CastInst>(CInst->getUnderlyingInstr());
Intrinsic::ID VPID = VPIntrinsic::getForOpcode(CI->getOpcode());
assert(VPID != Intrinsic::not_intrinsic &&
- "Expected vp.casts Instrinsic");
+ VPIntrinsic::getMaskParamPos(VPID) &&
+ VPIntrinsic::getVectorLengthParamPos(VPID) &&
+ "Expected vp.cast intrinsic");
SmallVector<VPValue *> Ops(CInst->operands());
- assert(VPIntrinsic::getMaskParamPos(VPID) &&
- VPIntrinsic::getVectorLengthParamPos(VPID) &&
- "Expected VP intrinsic");
- Ops.push_back(AllOneMask);
+ Ops.push_back(&AllOneMask);
Ops.push_back(&EVL);
return new VPWidenIntrinsicRecipe(
VPID, Ops, TypeInfo.inferScalarType(CInst), CInst->getDebugLoc());
@@ -1520,7 +1518,7 @@ static VPRecipeBase *createEVLRecipe(VPValue &EVL, VPValue *HeaderMask,
// Use all true as the condition because this transformation is
// limited to selects whose condition is a header mask.
return new VPWidenIntrinsicRecipe(
- Intrinsic::vp_merge, {AllOneMask, LHS, RHS, &EVL},
+ Intrinsic::vp_merge, {&AllOneMask, LHS, RHS, &EVL},
TypeInfo.inferScalarType(LHS), VPI->getDebugLoc());
})
.Default([&](VPRecipeBase *R) { return nullptr; });
@@ -1542,7 +1540,7 @@ static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
for (VPUser *U : collectUsersRecursively(HeaderMask)) {
auto *CurRecipe = cast<VPRecipeBase>(U);
VPRecipeBase *EVLRecipe =
- createEVLRecipe(EVL, HeaderMask, AllOneMask, CurRecipe, TypeInfo);
+ createEVLRecipe(HeaderMask, *CurRecipe, TypeInfo, *AllOneMask, EVL);
if (!EVLRecipe)
continue;
>From f4bc49a14bdc6b02e1d2a7e5de325e0042dded0e Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Fri, 13 Dec 2024 10:48:22 +0800
Subject: [PATCH 3/5] add comment for createEVLRecipe implement
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6d8b16a45c5a61..6e1deb49820437 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1438,7 +1438,13 @@ void VPlanTransforms::addActiveLaneMask(
HeaderMask->replaceAllUsesWith(LaneMask);
}
-/// Create EVLRecipe with Recipe
+// Convert each widen Recipe to a widen EVLRecipe in VectorLoopRegion.
+// \p HeaderMask Header Mask.
+// \p CurRecipe Recipe to be transform.
+// \p TypeInfo VPlan-based type analysis.
+// \p AllOneMask The vector mask parameter of vector-predication intrinsics.
+// \p EVL The explicit vector length parameter of vector-predication
+// intrinsics.
static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
VPRecipeBase &CurRecipe,
VPTypeAnalysis &TypeInfo,
>From a0c28d049170e12b6cc3e33a027ad2978acf9702 Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Fri, 13 Dec 2024 21:44:58 +0800
Subject: [PATCH 4/5] address the comments
---
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 9 +++++----
1 file changed, 5 insertions(+), 4 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6e1deb49820437..200658054fc197 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1479,8 +1479,8 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
auto *CI = dyn_cast<CallInst>(CInst->getUnderlyingInstr());
Intrinsic::ID VPID = VPIntrinsic::getForIntrinsic(
CI->getCalledFunction()->getIntrinsicID());
- assert(VPID != Intrinsic::not_intrinsic &&
- VPIntrinsic::getMaskParamPos(VPID) &&
+ assert(VPID != Intrinsic::not_intrinsic && "Expected VP intrinsic");
+ assert(VPIntrinsic::getMaskParamPos(VPID) &&
VPIntrinsic::getVectorLengthParamPos(VPID) &&
"Expected VP intrinsic");
@@ -1495,9 +1495,10 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
auto *CI = dyn_cast<CastInst>(CInst->getUnderlyingInstr());
Intrinsic::ID VPID = VPIntrinsic::getForOpcode(CI->getOpcode());
assert(VPID != Intrinsic::not_intrinsic &&
- VPIntrinsic::getMaskParamPos(VPID) &&
+ "Expected vp.casts Instrinsic");
+ assert(VPIntrinsic::getMaskParamPos(VPID) &&
VPIntrinsic::getVectorLengthParamPos(VPID) &&
- "Expected vp.cast intrinsic");
+ "Expected VP intrinsic");
SmallVector<VPValue *> Ops(CInst->operands());
Ops.push_back(&AllOneMask);
>From 7460da9d946c15931b9ae01378809f47951c65dc Mon Sep 17 00:00:00 2001
From: LiqinWeng <liqin.weng at spacemit.com>
Date: Mon, 16 Dec 2024 10:15:32 +0800
Subject: [PATCH 5/5] address the comments
---
llvm/lib/Transforms/Vectorize/VPlan.h | 3 ++
.../Transforms/Vectorize/VPlanTransforms.cpp | 33 ++++++-----------
...vectorize-force-tail-with-evl-reduction.ll | 2 +-
.../RISCV/vplan-vp-call-intrinsics.ll | 36 +++++++++----------
4 files changed, 32 insertions(+), 42 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 8794517b777f3b..dc0e743e2def88 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -1697,6 +1697,9 @@ class VPWidenIntrinsicRecipe : public VPRecipeWithIRFlags {
InstructionCost computeCost(ElementCount VF,
VPCostContext &Ctx) const override;
+ /// Return the ID of the intrinsic.
+ Intrinsic::ID getVectorIntrinsicID() const { return VectorIntrinsicID; }
+
/// Return the scalar return type of the intrinsic.
Type *getResultType() const { return ResultTy; }
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 200658054fc197..2ff55966193521 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1438,7 +1438,8 @@ void VPlanTransforms::addActiveLaneMask(
HeaderMask->replaceAllUsesWith(LaneMask);
}
-// Convert each widen Recipe to a widen EVLRecipe in VectorLoopRegion.
+// Try to convert \p CurRecipe to a corresponding EVL-based recipe. Returns
+// nullptr if no EVL-based recipe could be created.
// \p HeaderMask Header Mask.
// \p CurRecipe Recipe to be transform.
// \p TypeInfo VPlan-based type analysis.
@@ -1474,28 +1475,14 @@ static VPRecipeBase *createEVLRecipe(VPValue *HeaderMask,
VPValue *NewMask = GetNewMask(Red->getCondOp());
return new VPReductionEVLRecipe(*Red, EVL, NewMask);
})
- .Case<VPWidenIntrinsicRecipe>(
- [&](VPWidenIntrinsicRecipe *CInst) -> VPRecipeBase * {
- auto *CI = dyn_cast<CallInst>(CInst->getUnderlyingInstr());
- Intrinsic::ID VPID = VPIntrinsic::getForIntrinsic(
- CI->getCalledFunction()->getIntrinsicID());
- assert(VPID != Intrinsic::not_intrinsic && "Expected VP intrinsic");
- assert(VPIntrinsic::getMaskParamPos(VPID) &&
- VPIntrinsic::getVectorLengthParamPos(VPID) &&
- "Expected VP intrinsic");
-
- SmallVector<VPValue *> Ops(CInst->operands());
- Ops.push_back(&AllOneMask);
- Ops.push_back(&EVL);
- return new VPWidenIntrinsicRecipe(*CI, VPID, Ops,
- TypeInfo.inferScalarType(CInst),
- CInst->getDebugLoc());
- })
- .Case<VPWidenCastRecipe>([&](VPWidenCastRecipe *CInst) -> VPRecipeBase * {
- auto *CI = dyn_cast<CastInst>(CInst->getUnderlyingInstr());
- Intrinsic::ID VPID = VPIntrinsic::getForOpcode(CI->getOpcode());
- assert(VPID != Intrinsic::not_intrinsic &&
- "Expected vp.casts Instrinsic");
+ .Case<VPWidenIntrinsicRecipe, VPWidenCastRecipe>([&](auto *CInst)
+ -> VPRecipeBase * {
+ Intrinsic::ID VPID;
+ if (auto *CallInst = dyn_cast<VPWidenIntrinsicRecipe>(CInst))
+ VPID = VPIntrinsic::getForIntrinsic(CallInst->getVectorIntrinsicID());
+ else if (auto *CastInst = dyn_cast<VPWidenCastRecipe>(CInst))
+ VPID = VPIntrinsic::getForOpcode(CastInst->getOpcode());
+ assert(VPID != Intrinsic::not_intrinsic && "Expected VP intrinsic");
assert(VPIntrinsic::getMaskParamPos(VPID) &&
VPIntrinsic::getVectorLengthParamPos(VPID) &&
"Expected VP intrinsic");
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
index af5a62e5f480d6..7557c10892d6d4 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reduction.ll
@@ -1727,7 +1727,7 @@ define float @fmuladd(ptr %a, ptr %b, i64 %n, float %start) {
; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i64 [[TMP11]]
; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call <vscale x 4 x float> @llvm.vp.load.nxv4f32.p0(ptr align 4 [[TMP15]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
-; IF-EVL-NEXT: [[TMP16:%.*]] = call reassoc <vscale x 4 x float> @llvm.vp.fmuladd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VP_OP_LOAD1]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
+; IF-EVL-NEXT: [[TMP16:%.*]] = call <vscale x 4 x float> @llvm.vp.fmuladd.nxv4f32(<vscale x 4 x float> [[VP_OP_LOAD]], <vscale x 4 x float> [[VP_OP_LOAD1]], <vscale x 4 x float> [[VEC_PHI]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP17]] = call <vscale x 4 x float> @llvm.vp.merge.nxv4f32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x float> [[TMP16]], <vscale x 4 x float> [[VEC_PHI]], i32 [[TMP10]])
; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP10]] to i64
; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
index 5c47f07f4436c3..05c687a2053533 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-call-intrinsics.ll
@@ -27,10 +27,10 @@ define void @vp_smax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMAX:%.+]]> = call llvm.vp.smax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SMAX:%.+]]> = call llvm.vp.smax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMAX]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[SMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -80,10 +80,10 @@ define void @vp_smin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[SMIN:%.+]]> = call llvm.vp.smin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[SMIN:%.+]]> = call llvm.vp.smin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[SMIN]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[SMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -133,10 +133,10 @@ define void @vp_umax(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMAX:%.+]]> = call llvm.vp.umax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UMAX:%.+]]> = call llvm.vp.umax(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMAX]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[UMAX]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -186,10 +186,10 @@ define void @vp_umin(ptr %a, ptr %b, ptr %c, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = vp.load vp<[[PTR2]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[UMIN:%.+]]> = call llvm.vp.umin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[UMIN:%.+]]> = call llvm.vp.umin(ir<[[LD1]]>, ir<[[LD2]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, ir<[[UMIN]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR3]]>, vp<[[UMIN]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -236,10 +236,10 @@ define void @vp_ctlz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTLZ:%.+]]> = call llvm.vp.ctlz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[CTLZ:%.+]]> = call llvm.vp.ctlz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTLZ]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[CTLZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -284,10 +284,10 @@ define void @vp_cttz(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[CTTZ:%.+]]> = call llvm.vp.cttz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[CTTZ:%.+]]> = call llvm.vp.cttz(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[CTTZ]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[CTTZ]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
@@ -333,8 +333,8 @@ define void @vp_lrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LRINT:%.+]]> = call llvm.vp.lrint(vp<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
-; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(ir<[[LRINT]]>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[LRINT:%.+]]> = call llvm.vp.lrint(vp<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(vp<[[LRINT]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]>
@@ -385,8 +385,8 @@ define void @vp_llrint(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[FPEXT:%.+]]> = call llvm.vp.fpext(ir<[[LD1]]>, ir<true>, vp<[[EVL]]>)
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[LLRINT:%.+]]> = call llvm.vp.llrint(vp<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
-; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(ir<[[LLRINT]]>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[LLRINT:%.+]]> = call llvm.vp.llrint(vp<[[FPEXT]]>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[TRUNC:%.+]]> = call llvm.vp.trunc(vp<[[LLRINT]]>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[TRUNC]]>, vp<[[EVL]]>
@@ -436,10 +436,10 @@ define void @vp_abs(ptr %a, ptr %b, i64 %N) {
; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]>
; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = vp.load vp<[[PTR1]]>, vp<[[EVL]]>
-; IF-EVL-NEXT: WIDEN-INTRINSIC ir<[[ABS:%.+]]> = call llvm.vp.abs(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
+; IF-EVL-NEXT: WIDEN-INTRINSIC vp<[[ABS:%.+]]> = call llvm.vp.abs(ir<[[LD1]]>, ir<true>, ir<true>, vp<[[EVL]]>)
; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]>
; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]>
-; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, ir<[[ABS]]>, vp<[[EVL]]>
+; IF-EVL-NEXT: WIDEN vp.store vp<[[PTR2]]>, vp<[[ABS]]>, vp<[[EVL]]>
; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]>
; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%.+]]> = add vp<[[IV]]>, vp<[[VFUF]]>
More information about the llvm-commits
mailing list