[llvm] [LV] Support binary and unary operations with EVL-vectorization (PR #93854)
via llvm-commits
llvm-commits at lists.llvm.org
Thu May 30 11:02:32 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Kolya Panchenko (nikolaypanchenko)
<details>
<summary>Changes</summary>
The patch adds `VPWidenEVLRecipe` which represents `VPWidenRecipe` + EVL argument. The new recipe replaces `VPWidenRecipe` in `tryAddExplicitVectorLength` for each binary and unary operations. Follow up patches will extend support for remaining cases, like `FCmp` and `ICmp`
---
Patch is 138.14 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/93854.diff
8 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/VPlan.h (+52-2)
- (modified) llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp (+71)
- (modified) llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp (+55-26)
- (modified) llvm/lib/Transforms/Vectorize/VPlanValue.h (+1)
- (added) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll (+1799)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll (+10-10)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll (+11-11)
- (modified) llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll (+1-1)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index e75a1de548f7d..f1381b99ed68f 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -855,6 +855,7 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenCastSC:
case VPRecipeBase::VPWidenGEPSC:
case VPRecipeBase::VPWidenSC:
+ case VPRecipeBase::VPWidenEVLSC:
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
@@ -1039,6 +1040,7 @@ class VPRecipeWithIRFlags : public VPSingleDefRecipe {
static inline bool classof(const VPRecipeBase *R) {
return R->getVPDefID() == VPRecipeBase::VPInstructionSC ||
R->getVPDefID() == VPRecipeBase::VPWidenSC ||
+ R->getVPDefID() == VPRecipeBase::VPWidenEVLSC ||
R->getVPDefID() == VPRecipeBase::VPWidenGEPSC ||
R->getVPDefID() == VPRecipeBase::VPWidenCastSC ||
R->getVPDefID() == VPRecipeBase::VPReplicateSC ||
@@ -1333,13 +1335,18 @@ class VPInstruction : public VPRecipeWithIRFlags {
/// ingredient. This recipe covers most of the traditional vectorization cases
/// where each ingredient transforms into a vectorized version of itself.
class VPWidenRecipe : public VPRecipeWithIRFlags {
+protected:
unsigned Opcode;
+ template <typename IterT>
+ VPWidenRecipe(unsigned VPDefOpcode, Instruction &I,
+ iterator_range<IterT> Operands)
+ : VPRecipeWithIRFlags(VPDefOpcode, Operands, I), Opcode(I.getOpcode()) {}
+
public:
template <typename IterT>
VPWidenRecipe(Instruction &I, iterator_range<IterT> Operands)
- : VPRecipeWithIRFlags(VPDef::VPWidenSC, Operands, I),
- Opcode(I.getOpcode()) {}
+ : VPWidenRecipe(VPDef::VPWidenSC, I, Operands) {}
~VPWidenRecipe() override = default;
@@ -1363,6 +1370,49 @@ class VPWidenRecipe : public VPRecipeWithIRFlags {
#endif
};
+class VPWidenEVLRecipe : public VPWidenRecipe {
+private:
+ using VPRecipeWithIRFlags::transferFlags;
+
+public:
+ template <typename IterT>
+ VPWidenEVLRecipe(Instruction &I, iterator_range<IterT> Operands, VPValue &EVL)
+ : VPWidenRecipe(VPDef::VPWidenEVLSC, I, Operands) {
+ addOperand(&EVL);
+ }
+
+ ~VPWidenEVLRecipe() override = default;
+
+ VPWidenRecipe *clone() override final {
+ SmallVector<VPValue *> Ops(operands());
+ VPValue *EVL = Ops.pop_back_val();
+ auto *R = new VPWidenEVLRecipe(*getUnderlyingInstr(),
+ make_range(Ops.begin(), Ops.end()), *EVL);
+ R->transferFlags(*this);
+ return R;
+ }
+
+ VP_CLASSOF_IMPL(VPDef::VPWidenEVLSC);
+
+ VPValue *getEVL() { return getOperand(getNumOperands() - 1); }
+ const VPValue *getEVL() const { return getOperand(getNumOperands() - 1); }
+
+ /// A helper function to create widen EVL recipe from regular widen recipe.
+ static VPWidenEVLRecipe *create(VPWidenRecipe *W, VPValue &EVL);
+
+ /// Produce widened copies of all Ingredients.
+ void execute(VPTransformState &State) override final;
+
+ /// Returns true if the recipe only uses the first lane of operand \p Op.
+ bool onlyFirstLaneUsed(const VPValue *Op) const override;
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ /// Print the recipe.
+ void print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const override final;
+#endif
+};
+
/// VPWidenCastRecipe is a recipe to create vector cast instructions.
class VPWidenCastRecipe : public VPRecipeWithIRFlags {
/// Cast instruction opcode.
diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
index 5eb99ffd1e10e..a6fd26b666501 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp
@@ -23,6 +23,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
+#include "llvm/IR/VectorBuilder.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -71,6 +72,7 @@ bool VPRecipeBase::mayWriteToMemory() const {
case VPWidenLoadSC:
case VPWidenPHISC:
case VPWidenSC:
+ case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -110,6 +112,7 @@ bool VPRecipeBase::mayReadFromMemory() const {
case VPWidenIntOrFpInductionSC:
case VPWidenPHISC:
case VPWidenSC:
+ case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -157,6 +160,7 @@ bool VPRecipeBase::mayHaveSideEffects() const {
case VPWidenPHISC:
case VPWidenPointerInductionSC:
case VPWidenSC:
+ case VPWidenEVLSC:
case VPWidenSelectSC: {
const Instruction *I =
dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue());
@@ -993,6 +997,64 @@ void VPWidenRecipe::execute(VPTransformState &State) {
#endif
}
+VPWidenEVLRecipe *VPWidenEVLRecipe::create(VPWidenRecipe *W, VPValue &EVL) {
+ auto *R = new VPWidenEVLRecipe(*W->getUnderlyingInstr(), W->operands(), EVL);
+ R->transferFlags(*W);
+ return R;
+}
+
+void VPWidenEVLRecipe::execute(VPTransformState &State) {
+ assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with "
+ "explicit vector length.");
+ VPValue *Op0 = getOperand(0);
+
+ // If it's scalar operation, hand translation over to VPWidenRecipe
+ if (!State.get(Op0, 0)->getType()->isVectorTy())
+ return VPWidenRecipe::execute(State);
+
+ VPValue *EVL = getEVL();
+ Value *EVLArg = State.get(EVL, 0, /*NeedsScalar=*/true);
+ unsigned Opcode = getOpcode();
+ Instruction *I = getUnderlyingInstr();
+ IRBuilderBase &BuilderIR = State.Builder;
+ VectorBuilder Builder(BuilderIR);
+ Value *Mask = BuilderIR.CreateVectorSplat(State.VF, BuilderIR.getTrue());
+ Value *VPInst = nullptr;
+
+ //===------------------- Binary and Unary Ops ---------------------===//
+ if (Instruction::isBinaryOp(Opcode) || Instruction::isUnaryOp(Opcode)) {
+ // Just widen unops and binops.
+
+ SmallVector<Value *, 4> Ops;
+ for (unsigned I = 0, E = getNumOperands() - 1; I < E; ++I) {
+ VPValue *VPOp = getOperand(I);
+ Ops.push_back(State.get(VPOp, 0));
+ }
+
+ Builder.setMask(Mask).setEVL(EVLArg);
+ VPInst = Builder.createVectorInstruction(Opcode, Ops[0]->getType(), Ops,
+ "vp.op");
+
+ if (I)
+ if (auto *VecOp = dyn_cast<Instruction>(VPInst))
+ VecOp->copyIRFlags(I);
+ } else {
+ llvm_unreachable("Unsupported opcode in VPWidenEVLRecipe::execute");
+ }
+ State.set(this, VPInst, 0);
+ State.addMetadata(VPInst, I);
+}
+
+bool VPWidenEVLRecipe::onlyFirstLaneUsed(const VPValue *Op) const {
+ assert(is_contained(operands(), Op) && "Op must be an operand of the recipe");
+ // EVL in that recipe is always the last operand, thus any use before means
+ // the VPValue should be vectorized.
+ for (unsigned I = 0, E = getNumOperands() - 1; I != E; ++I)
+ if (getOperand(I) == Op)
+ return false;
+ return true;
+}
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
@@ -1002,6 +1064,15 @@ void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
printFlags(O);
printOperands(O, SlotTracker);
}
+
+void VPWidenEVLRecipe::print(raw_ostream &O, const Twine &Indent,
+ VPSlotTracker &SlotTracker) const {
+ O << Indent << "WIDEN vp ";
+ printAsOperand(O, SlotTracker);
+ O << " = " << Instruction::getOpcodeName(Opcode);
+ printFlags(O);
+ printOperands(O, SlotTracker);
+}
#endif
void VPWidenCastRecipe::execute(VPTransformState &State) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 422579ea8b84f..39ebd44909ea6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -20,6 +20,7 @@
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/TypeSwitch.h"
#include "llvm/Analysis/IVDescriptors.h"
#include "llvm/Analysis/VectorUtils.h"
#include "llvm/IR/Intrinsics.h"
@@ -1219,7 +1220,7 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch(
/// WideCanonicalIV, backedge-taken-count) pattern.
/// TODO: Introduce explicit recipe for header-mask instead of searching
/// for the header-mask pattern manually.
-static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
+static DenseSet<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
SmallVector<VPValue *> WideCanonicalIVs;
auto *FoundWidenCanonicalIVUser =
find_if(Plan.getCanonicalIV()->users(),
@@ -1245,7 +1246,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
// Walk users of wide canonical IVs and collect to all compares of the form
// (ICMP_ULE, WideCanonicalIV, backedge-taken-count).
- SmallVector<VPValue *> HeaderMasks;
+ DenseSet<VPValue *> HeaderMasks;
VPValue *BTC = Plan.getOrCreateBackedgeTakenCount();
for (auto *Wide : WideCanonicalIVs) {
for (VPUser *U : SmallVector<VPUser *>(Wide->users())) {
@@ -1257,7 +1258,7 @@ static SmallVector<VPValue *> collectAllHeaderMasks(VPlan &Plan) {
assert(HeaderMask->getOperand(0) == Wide &&
"WidenCanonicalIV must be the first operand of the compare");
- HeaderMasks.push_back(HeaderMask);
+ HeaderMasks.insert(HeaderMask);
}
}
return HeaderMasks;
@@ -1296,6 +1297,55 @@ void VPlanTransforms::addActiveLaneMask(
HeaderMask->replaceAllUsesWith(LaneMask);
}
+/// Replace recipes with their EVL variants.
+static void transformRecipestoEVLRecipes(VPlan &Plan, VPValue &EVL) {
+ DenseSet<VPRecipeBase *> ToRemove;
+
+ ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<VPBlockBase *>> RPOT(
+ Plan.getEntry());
+ DenseSet<VPValue *> HeaderMasks = collectAllHeaderMasks(Plan);
+ for (VPBasicBlock *VPBB : reverse(VPBlockUtils::blocksOnly<VPBasicBlock>(RPOT))) {
+ // The recipes in the block are processed in reverse order, to catch chains
+ // of dead recipes.
+ for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
+ TypeSwitch<VPRecipeBase *>(&R)
+ .Case<VPWidenLoadRecipe>([&](VPWidenLoadRecipe *L) {
+ VPValue *NewMask =
+ HeaderMasks.contains(L->getMask()) ? nullptr : L->getMask();
+ auto *N = new VPWidenLoadEVLRecipe(L, &EVL, NewMask);
+ N->insertBefore(L);
+ L->replaceAllUsesWith(N);
+ ToRemove.insert(L);
+ })
+ .Case<VPWidenStoreRecipe>([&](VPWidenStoreRecipe *S) {
+ VPValue *NewMask =
+ HeaderMasks.contains(S->getMask()) ? nullptr : S->getMask();
+ auto *N = new VPWidenStoreEVLRecipe(S, &EVL, NewMask);
+ N->insertBefore(S);
+ ToRemove.insert(S);
+ })
+ .Case<VPWidenRecipe>([&](VPWidenRecipe *W) {
+ unsigned Opcode = W->getOpcode();
+ if (!Instruction::isBinaryOp(Opcode) &&
+ !Instruction::isUnaryOp(Opcode))
+ return;
+ auto *N = VPWidenEVLRecipe::create(W, EVL);
+ N->insertBefore(W);
+ W->replaceAllUsesWith(N);
+ ToRemove.insert(W);
+ });
+ }
+ }
+
+ for (VPRecipeBase *R : ToRemove)
+ R->eraseFromParent();
+
+ for (VPValue *HeaderMask : HeaderMasks)
+ recursivelyDeleteDeadRecipes(HeaderMask);
+}
+
+
+
/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and
/// replaces all uses except the canonical IV increment of
/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe
@@ -1356,29 +1406,8 @@ bool VPlanTransforms::tryAddExplicitVectorLength(VPlan &Plan) {
NextEVLIV->insertBefore(CanonicalIVIncrement);
EVLPhi->addOperand(NextEVLIV);
- for (VPValue *HeaderMask : collectAllHeaderMasks(Plan)) {
- for (VPUser *U : collectUsersRecursively(HeaderMask)) {
- auto *MemR = dyn_cast<VPWidenMemoryRecipe>(U);
- if (!MemR)
- continue;
- VPValue *OrigMask = MemR->getMask();
- assert(OrigMask && "Unmasked widen memory recipe when folding tail");
- VPValue *NewMask = HeaderMask == OrigMask ? nullptr : OrigMask;
- if (auto *L = dyn_cast<VPWidenLoadRecipe>(MemR)) {
- auto *N = new VPWidenLoadEVLRecipe(L, VPEVL, NewMask);
- N->insertBefore(L);
- L->replaceAllUsesWith(N);
- L->eraseFromParent();
- } else if (auto *S = dyn_cast<VPWidenStoreRecipe>(MemR)) {
- auto *N = new VPWidenStoreEVLRecipe(S, VPEVL, NewMask);
- N->insertBefore(S);
- S->eraseFromParent();
- } else {
- llvm_unreachable("unsupported recipe");
- }
- }
- recursivelyDeleteDeadRecipes(HeaderMask);
- }
+ transformRecipestoEVLRecipes(Plan, *VPEVL);
+
// Replace all uses of VPCanonicalIVPHIRecipe by
// VPEVLBasedIVPHIRecipe except for the canonical IV increment.
CanonicalIVPHI->replaceAllUsesWith(EVLPhi);
diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h
index 8d945f6f2b8ea..a90d42289e5f1 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanValue.h
+++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h
@@ -356,6 +356,7 @@ class VPDef {
VPWidenStoreEVLSC,
VPWidenStoreSC,
VPWidenSC,
+ VPWidenEVLSC,
VPWidenSelectSC,
VPBlendSC,
// START: Phi-like recipes. Need to be kept together.
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll
new file mode 100644
index 0000000000000..77689fc8a76ee
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-bin-unary-ops-args.ll
@@ -0,0 +1,1799 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -passes=loop-vectorize \
+; RUN: -force-tail-folding-style=data-with-evl \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL
+
+; RUN: opt -passes=loop-vectorize \
+; RUN: -force-tail-folding-style=none \
+; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \
+; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP
+
+
+define void @test_and(ptr nocapture %a, ptr nocapture readonly %b) {
+; IF-EVL-LABEL: define void @test_and(
+; IF-EVL-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; IF-EVL-NEXT: [[LOOP_PREHEADER:.*]]:
+; IF-EVL-NEXT: [[A2:%.*]] = ptrtoint ptr [[A]] to i64
+; IF-EVL-NEXT: [[B1:%.*]] = ptrtoint ptr [[B]] to i64
+; IF-EVL-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_MEMCHECK:.*]]
+; IF-EVL: [[VECTOR_MEMCHECK]]:
+; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 16
+; IF-EVL-NEXT: [[TMP2:%.*]] = sub i64 [[B1]], [[A2]]
+; IF-EVL-NEXT: [[DIFF_CHECK:%.*]] = icmp ult i64 [[TMP2]], [[TMP1]]
+; IF-EVL-NEXT: br i1 [[DIFF_CHECK]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
+; IF-EVL: [[VECTOR_PH]]:
+; IF-EVL-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP4:%.*]] = mul i64 [[TMP3]], 16
+; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 16
+; IF-EVL-NEXT: [[TMP7:%.*]] = sub i64 [[TMP6]], 1
+; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 100, [[TMP7]]
+; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP4]]
+; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
+; IF-EVL-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
+; IF-EVL-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 16
+; IF-EVL-NEXT: br label %[[VECTOR_BODY:.*]]
+; IF-EVL: [[VECTOR_BODY]]:
+; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IF-EVL-NEXT: [[TMP10:%.*]] = sub i64 100, [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[TMP11:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP10]], i32 16, i1 true)
+; IF-EVL-NEXT: [[TMP12:%.*]] = add i64 [[EVL_BASED_IV]], 0
+; IF-EVL-NEXT: [[TMP13:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[TMP12]]
+; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i8, ptr [[TMP13]], i32 0
+; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.vp.load.nxv16i8.p0(ptr align 1 [[TMP14]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
+; IF-EVL-NEXT: [[TMP15:%.*]] = call <vscale x 16 x i8> @llvm.vp.and.nxv16i8(<vscale x 16 x i8> [[VP_OP_LOAD]], <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 1, i64 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer), <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
+; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[TMP12]]
+; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i8, ptr [[TMP16]], i32 0
+; IF-EVL-NEXT: call void @llvm.vp.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr align 1 [[TMP17]], <vscale x 16 x i1> shufflevector (<vscale x 16 x i1> insertelement (<vscale x 16 x i1> poison, i1 true, i64 0), <vscale x 16 x i1> poison, <vscale x 16 x i32> zeroinitializer), i32 [[TMP11]])
+; IF-EVL-NEXT: [[TMP18:%.*]] = zext i32 [[TMP11]] to i64
+; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP18]], [[EVL_BASED_IV]]
+; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP9]]
+; IF-EVL-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; IF-EVL-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; IF-EVL: [[MIDDLE_BLOCK]]:
+; IF-EVL-NEXT: br i1 true, label %[[FINISH_LOOPEXIT:.*]], label %[[SCALAR_PH]]
+; IF-EVL: [[SCALAR_PH]]:
+; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[LOOP_PREHEADER]] ], [ 0, %[[VECTOR_MEMCHECK]] ]
+; IF-EVL-NEXT: br label %[[LOOP:.*]]
+; IF-EVL: [[LOOP]]:
+; IF-EVL-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ]
+; IF-EVL-NEXT: [[DEC]] = add nsw i64 [[LEN]], 1
+; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
+; IF-EVL-NEXT: [[TMP20:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; IF-EVL-NEXT: [[TMP:%.*]] = and i8 [[TMP20]], 1
+; IF-EVL-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
+; IF-EVL-NEXT: store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1
+; IF-EVL-NEXT: [[DOTNOT:%.*]] = icmp eq i64 [[DEC]], 100
+; IF-EVL-NEXT: br i1 [[DOTNOT]], label %[[FINISH_LOOPEXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]]
+; IF-EVL: [[FINISH_LOOPEXIT]]:
+; IF-EVL-NEXT: ret void
+;
+; NO-VP-LABEL: define void @test_and(
+; NO-VP-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) #[[ATTR0:[0-9]+]] {
+; NO-VP-NEXT: [[LOOP_PREHEADER:.*]]:
+; NO-VP-NEXT: br label %[[LOOP:.*]]
+; NO-VP: [[LOOP]]:
+; NO-VP-NEXT: [[LEN:%.*]] = phi i64 [ [[DEC:%.*]], %[[LOOP]] ], [ 0, %[[LOOP_PREHEADER]] ]
+; NO-VP-NEXT: [[DEC]] = add nsw i64 [[LEN]], 1
+; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[LEN]]
+; NO-VP-NEXT: [[TMP0:%.*]] = load i8, ptr [[ARRAYIDX]], align 1
+; NO-VP-NEXT: [[TMP:%.*]] = and i8 [[TMP0]], 1
+; NO-VP-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[B]], i64 [[LEN]]
+; NO-VP-NEXT: store i8 [[TMP]], ptr [[ARRAYIDX1]], align 1...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/93854
More information about the llvm-commits
mailing list