[llvm] [AArch64] Add MATCH loops to LoopIdiomVectorizePass (PR #101976)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 5 06:28:26 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Ricardo Jesus (rj-jesus)
<details>
<summary>Changes</summary>
This patch adds a new loop to LoopIdiomVectorizePass, enabling it to
recognise and vectorise loops such as:
```cpp
template<class InputIt, class ForwardIt>
InputIt find_first_of(InputIt first, InputIt last,
ForwardIt s_first, ForwardIt s_last)
{
for (; first != last; ++first)
for (ForwardIt it = s_first; it != s_last; ++it)
if (*first == *it)
return first;
return last;
}
```
These loops match the C++ standard library function std::find_first_of.
The loops are vectorised using `@<!-- -->experimental.vector.match` in #<!-- -->101974.
---
Patch is 41.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101976.diff
12 Files Affected:
- (modified) llvm/docs/LangRef.rst (+45)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfo.h (+9)
- (modified) llvm/include/llvm/Analysis/TargetTransformInfoImpl.h (+2)
- (modified) llvm/include/llvm/IR/Intrinsics.td (+10)
- (modified) llvm/lib/Analysis/TargetTransformInfo.cpp (+5)
- (modified) llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp (+9)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+46)
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp (+12)
- (modified) llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h (+2)
- (modified) llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp (+441-1)
- (added) llvm/test/CodeGen/AArch64/find-first-byte.ll (+120)
- (added) llvm/test/CodeGen/AArch64/intrinsic-vector-match-sve2.ll (+57)
``````````diff
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index b17e3c828ed3d..dd9851d1af078 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -19637,6 +19637,51 @@ are undefined.
}
+'``llvm.experimental.vector.match.*``' Intrinsic
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+This is an overloaded intrinsic. Support for specific vector types is target
+dependent.
+
+::
+
+ declare <<n> x i1> @llvm.experimental.vector.match(<<n> x <ty>> %op1, <<n> x <ty>> %op2, <<n> x i1> %mask, i32 <segsize>)
+ declare <vscale x <n> x i1> @llvm.experimental.vector.match(<vscale x <n> x <ty>> %op1, <vscale x <n> x <ty>> %op2, <vscale x <n> x i1> %mask, i32 <segsize>)
+
+Overview:
+"""""""""
+
+Find elements of the first argument matching any elements of the second.
+
+Arguments:
+""""""""""
+
+The first argument is the search vector, the second argument is the vector of
+elements we are searching for (i.e. for which we consider a match successful),
+and the third argument is a mask that controls which elements of the first
+argument are active. The fourth argument is an immediate that sets the segment
+size for the search window.
+
+Semantics:
+""""""""""
+
+The '``llvm.experimental.vector.match``' intrinsic compares each element in the
+first argument against potentially several elements of the second, placing
+``1`` in the corresponding element of the output vector if any comparison is
+successful, and ``0`` otherwise. Inactive elements in the mask are set to ``0``
+in the output. The segment size controls the number of elements of the second
+argument that are compared against.
+
+For example, for vectors with 16 elements, if ``segsize = 16`` then each
+element of the first argument is compared against all 16 elements of the second
+argument; but if ``segsize = 4``, then each of the first four elements of the
+first argument is compared against the first four elements of the second
+argument, each of the second four elements of the first argument is compared
+against the second four elements of the second argument, and so forth.
+
Matrix Intrinsics
-----------------
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h
index 38e8b9da21397..786c13a177ccf 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfo.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h
@@ -1746,6 +1746,10 @@ class TargetTransformInfo {
bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
Align Alignment) const;
+ /// \returns Returns true if the target supports vector match operations for
+ /// the vector type `VT` using a segment size of `SegSize`.
+ bool hasVectorMatch(VectorType *VT, unsigned SegSize) const;
+
struct VPLegalization {
enum VPTransform {
// keep the predicating parameter
@@ -2184,6 +2188,7 @@ class TargetTransformInfo::Concept {
virtual bool supportsScalableVectors() const = 0;
virtual bool hasActiveVectorLength(unsigned Opcode, Type *DataType,
Align Alignment) const = 0;
+ virtual bool hasVectorMatch(VectorType *VT, unsigned SegSize) const = 0;
virtual VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0;
virtual bool hasArmWideBranch(bool Thumb) const = 0;
@@ -2952,6 +2957,10 @@ class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
return Impl.hasActiveVectorLength(Opcode, DataType, Alignment);
}
+ bool hasVectorMatch(VectorType *VT, unsigned SegSize) const override {
+ return Impl.hasVectorMatch(VT, SegSize);
+ }
+
VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const override {
return Impl.getVPLegalizationStrategy(PI);
diff --git a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
index d208a710bb27f..36621861ab8c8 100644
--- a/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -958,6 +958,8 @@ class TargetTransformInfoImplBase {
return false;
}
+ bool hasVectorMatch(VectorType *VT, unsigned SegSize) const { return false; }
+
TargetTransformInfo::VPLegalization
getVPLegalizationStrategy(const VPIntrinsic &PI) const {
return TargetTransformInfo::VPLegalization(
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index b4e758136b39f..f6d77aa596f60 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1892,6 +1892,16 @@ def int_experimental_vector_histogram_add : DefaultAttrsIntrinsic<[],
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], // Mask
[ IntrArgMemOnly ]>;
+// Experimental match
+def int_experimental_vector_match : DefaultAttrsIntrinsic<
+ [ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty> ],
+ [ llvm_anyvector_ty,
+ LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, // Mask
+ llvm_i32_ty ], // Segment size
+ [ IntrNoMem, IntrNoSync, IntrWillReturn,
+ ImmArg<ArgIndex<3>> ]>;
+
// Operators
let IntrProperties = [IntrNoMem, IntrNoSync, IntrWillReturn] in {
// Integer arithmetic
diff --git a/llvm/lib/Analysis/TargetTransformInfo.cpp b/llvm/lib/Analysis/TargetTransformInfo.cpp
index dcde78925bfa9..d8314af0537fe 100644
--- a/llvm/lib/Analysis/TargetTransformInfo.cpp
+++ b/llvm/lib/Analysis/TargetTransformInfo.cpp
@@ -1352,6 +1352,11 @@ bool TargetTransformInfo::hasActiveVectorLength(unsigned Opcode, Type *DataType,
return TTIImpl->hasActiveVectorLength(Opcode, DataType, Alignment);
}
+bool TargetTransformInfo::hasVectorMatch(VectorType *VT,
+ unsigned SegSize) const {
+ return TTIImpl->hasVectorMatch(VT, SegSize);
+}
+
TargetTransformInfo::Concept::~Concept() = default;
TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 9d617c7acd13c..9cb7d65975b9f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -8096,6 +8096,15 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, ResultVT, Vec, Index));
return;
}
+ case Intrinsic::experimental_vector_match: {
+ auto *VT = dyn_cast<VectorType>(I.getOperand(0)->getType());
+ auto SegmentSize = cast<ConstantInt>(I.getOperand(3))->getLimitedValue();
+ const auto &TTI =
+ TLI.getTargetMachine().getTargetTransformInfo(*I.getFunction());
+ assert(VT && TTI.hasVectorMatch(VT, SegmentSize) && "Unsupported type!");
+ visitTargetIntrinsic(I, Intrinsic);
+ return;
+ }
case Intrinsic::vector_reverse:
visitVectorReverse(I);
return;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 7704321a0fc3a..050807142fc0a 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -6106,6 +6106,51 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getNode(AArch64ISD::CTTZ_ELTS, dl, MVT::i64, CttzOp);
return DAG.getZExtOrTrunc(NewCttzElts, dl, Op.getValueType());
}
+ case Intrinsic::experimental_vector_match: {
+ SDValue ID =
+ DAG.getTargetConstant(Intrinsic::aarch64_sve_match, dl, MVT::i64);
+
+ auto Op1 = Op.getOperand(1);
+ auto Op2 = Op.getOperand(2);
+ auto Mask = Op.getOperand(3);
+ auto SegmentSize =
+ cast<ConstantSDNode>(Op.getOperand(4))->getLimitedValue();
+
+ EVT VT = Op.getValueType();
+ auto MinNumElts = VT.getVectorMinNumElements();
+
+ assert(Op1.getValueType() == Op2.getValueType() && "Type mismatch.");
+ assert(Op1.getValueSizeInBits().getKnownMinValue() == 128 &&
+ "Custom lower only works on 128-bit segments.");
+ assert((Op1.getValueType().getVectorElementType() == MVT::i8 ||
+ Op1.getValueType().getVectorElementType() == MVT::i16) &&
+ "Custom lower only supports 8-bit or 16-bit characters.");
+ assert(SegmentSize == MinNumElts && "Custom lower needs segment size to "
+ "match minimum number of elements.");
+
+ if (VT.isScalableVector())
+ return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, ID, Mask, Op1, Op2);
+
+ // We can use the SVE2 match instruction to lower this intrinsic by
+ // converting the operands to scalable vectors, doing a match, and then
+ // extracting a fixed-width subvector from the scalable vector.
+
+ EVT OpVT = Op1.getValueType();
+ EVT OpContainerVT = getContainerForFixedLengthVector(DAG, OpVT);
+ EVT MatchVT = OpContainerVT.changeElementType(MVT::i1);
+
+ auto ScalableOp1 = convertToScalableVector(DAG, OpContainerVT, Op1);
+ auto ScalableOp2 = convertToScalableVector(DAG, OpContainerVT, Op2);
+ auto ScalableMask = DAG.getNode(ISD::SIGN_EXTEND, dl, OpVT, Mask);
+ ScalableMask = convertFixedMaskToScalableVector(ScalableMask, DAG);
+
+ SDValue Match = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MatchVT, ID,
+ ScalableMask, ScalableOp1, ScalableOp2);
+
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT,
+ DAG.getNode(ISD::SIGN_EXTEND, dl, OpContainerVT, Match),
+ DAG.getVectorIdxConstant(0, dl));
+ }
}
}
@@ -26544,6 +26589,7 @@ void AArch64TargetLowering::ReplaceNodeResults(
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, V));
return;
}
+ case Intrinsic::experimental_vector_match:
case Intrinsic::get_active_lane_mask: {
if (!VT.isFixedLengthVector() || VT.getVectorElementType() != MVT::i1)
return;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index b8f19fa87e2ab..806dc856c5862 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -3835,6 +3835,18 @@ bool AArch64TTIImpl::isLegalToVectorizeReduction(
}
}
+bool AArch64TTIImpl::hasVectorMatch(VectorType *VT, unsigned SegSize) const {
+ // Check that the target has SVE2 (and SVE is available), that `VT' is a
+ // legal type for MATCH, and that the segment size is 128-bit.
+ if (ST->hasSVE2() && ST->isSVEAvailable() &&
+ VT->getPrimitiveSizeInBits().getKnownMinValue() == 128 &&
+ VT->getElementCount().getKnownMinValue() == SegSize &&
+ (VT->getElementCount().getKnownMinValue() == 8 ||
+ VT->getElementCount().getKnownMinValue() == 16))
+ return true;
+ return false;
+}
+
InstructionCost
AArch64TTIImpl::getMinMaxReductionCost(Intrinsic::ID IID, VectorType *Ty,
FastMathFlags FMF,
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index a9189fd53f40b..6ad21a9e0a77a 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -391,6 +391,8 @@ class AArch64TTIImpl : public BasicTTIImplBase<AArch64TTIImpl> {
return ST->hasSVE();
}
+ bool hasVectorMatch(VectorType *VT, unsigned SegSize) const;
+
InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
std::optional<FastMathFlags> FMF,
TTI::TargetCostKind CostKind);
diff --git a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
index cb31e2a2ecaec..a9683f08c5ab9 100644
--- a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
@@ -79,6 +79,12 @@ static cl::opt<unsigned>
cl::desc("The vectorization factor for byte-compare patterns."),
cl::init(16));
+static cl::opt<bool>
+ DisableFindFirstByte("disable-loop-idiom-vectorize-find-first-byte",
+ cl::Hidden, cl::init(false),
+ cl::desc("Proceed with Loop Idiom Vectorize Pass, but "
+ "do not convert find-first-byte loop(s)."));
+
static cl::opt<bool>
VerifyLoops("loop-idiom-vectorize-verify", cl::Hidden, cl::init(false),
cl::desc("Verify loops generated Loop Idiom Vectorize Pass."));
@@ -136,6 +142,21 @@ class LoopIdiomVectorize {
PHINode *IndPhi, Value *MaxLen, Instruction *Index,
Value *Start, bool IncIdx, BasicBlock *FoundBB,
BasicBlock *EndBB);
+
+ bool recognizeFindFirstByte();
+
+ Value *expandFindFirstByte(IRBuilder<> &Builder, DomTreeUpdater &DTU,
+ unsigned VF, unsigned CharWidth,
+ BasicBlock *ExitSucc, BasicBlock *ExitFail,
+ GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+ Value *StartA, Value *EndA,
+ Value *StartB, Value *EndB);
+
+ void transformFindFirstByte(PHINode *IndPhi, unsigned VF, unsigned CharWidth,
+ BasicBlock *ExitSucc, BasicBlock *ExitFail,
+ GetElementPtrInst *GEPA, GetElementPtrInst *GEPB,
+ Value *StartA, Value *EndA,
+ Value *StartB, Value *EndB);
/// @}
};
} // anonymous namespace
@@ -190,7 +211,13 @@ bool LoopIdiomVectorize::run(Loop *L) {
LLVM_DEBUG(dbgs() << DEBUG_TYPE " Scanning: F[" << F.getName() << "] Loop %"
<< CurLoop->getHeader()->getName() << "\n");
- return recognizeByteCompare();
+ if (recognizeByteCompare())
+ return true;
+
+ if (recognizeFindFirstByte())
+ return true;
+
+ return false;
}
bool LoopIdiomVectorize::recognizeByteCompare() {
@@ -941,3 +968,416 @@ void LoopIdiomVectorize::transformByteCompare(GetElementPtrInst *GEPA,
report_fatal_error("Loops must remain in LCSSA form!");
}
}
+
+bool LoopIdiomVectorize::recognizeFindFirstByte() {
+ // Currently the transformation only works on scalable vector types, although
+ // there is no fundamental reason why it cannot be made to work for fixed
+ // width too.
+ if (!TTI->supportsScalableVectors() || DisableFindFirstByte)
+ return false;
+
+ // Define some constants we need throughout.
+ // TODO: Some of these could be made configurable parameters. For example, we
+ // could allow CharWidth = 16 (and VF = 8).
+ unsigned VF = 16;
+ unsigned CharWidth = 8;
+ BasicBlock *Header = CurLoop->getHeader();
+ LLVMContext &Ctx = Header->getContext();
+ auto *CharTy = Type::getIntNTy(Ctx, CharWidth);
+ auto *CharVTy = ScalableVectorType::get(CharTy, VF);
+
+ // Check if the target supports efficient vector matches for vectors of
+ // bytes.
+ if (!TTI->hasVectorMatch(CharVTy, VF))
+ return false;
+
+ // In LoopIdiomVectorize::run we have already checked that the loop has a
+ // preheader so we can assume it's in a canonical form.
+ if (CurLoop->getNumBackEdges() != 1 || CurLoop->getNumBlocks() != 4)
+ return false;
+
+ // We expect this loop to have one nested loop.
+ if (CurLoop->getSubLoops().size() != 1)
+ return false;
+
+ auto *InnerLoop = CurLoop->getSubLoops().front();
+ PHINode *IndPhi = dyn_cast<PHINode>(&Header->front());
+
+ if (!IndPhi || IndPhi->getNumIncomingValues() != 2)
+ return false;
+
+ auto LoopBlocks = CurLoop->getBlocks();
+ // We are expecting the following blocks below. For now, we will bail out for
+ // anything deviating from this.
+ //
+ // .preheader: ; preds = %.preheader.preheader, %23
+ // %14 = phi ptr [ %24, %23 ], [ %3, %.preheader.preheader ]
+ // %15 = load i8, ptr %14, align 1, !tbaa !14
+ // br label %19
+ //
+ // 19: ; preds = %16, %.preheader
+ // %20 = phi ptr [ %7, %.preheader ], [ %17, %16 ]
+ // %21 = load i8, ptr %20, align 1, !tbaa !14
+ // %22 = icmp eq i8 %15, %21
+ // br i1 %22, label %.loopexit.loopexit, label %16
+ //
+ // 16: ; preds = %19
+ // %17 = getelementptr inbounds i8, ptr %20, i64 1
+ // %18 = icmp eq ptr %17, %10
+ // br i1 %18, label %23, label %19, !llvm.loop !15
+ //
+ // 23: ; preds = %16
+ // %24 = getelementptr inbounds i8, ptr %14, i64 1
+ // %25 = icmp eq ptr %24, %6
+ // br i1 %25, label %.loopexit.loopexit5, label %.preheader, !llvm.loop !17
+ //
+ if (LoopBlocks[0]->sizeWithoutDebug() > 3 ||
+ LoopBlocks[1]->sizeWithoutDebug() > 4 ||
+ LoopBlocks[2]->sizeWithoutDebug() > 3 ||
+ LoopBlocks[3]->sizeWithoutDebug() > 3)
+ return false;
+
+ // If we match the pattern, IndPhi is going to be replaced. We cannot replace
+ // the loop if any other of its instructions are used outside of it.
+ for (BasicBlock *BB : LoopBlocks)
+ for (Instruction &I : *BB)
+ if (&I != IndPhi)
+ for (User *U : I.users())
+ if (!CurLoop->contains(cast<Instruction>(U)))
+ return false;
+
+ // Match the branch instruction for the header. We are expecting an
+ // unconditional branch to the inner loop.
+ BasicBlock *MatchBB;
+ if (!match(Header->getTerminator(), m_UnconditionalBr(MatchBB)) ||
+ !InnerLoop->contains(MatchBB))
+ return false;
+
+ // MatchBB should be the entrypoint into the inner loop containing the
+ // comparison between a search item and a valid/successful match.
+ ICmpInst::Predicate MatchPred;
+ BasicBlock *ExitSucc;
+ BasicBlock *InnerBB;
+ Value *LoadA, *LoadB;
+ if (!match(MatchBB->getTerminator(),
+ m_Br(m_ICmp(MatchPred, m_Value(LoadA), m_Value(LoadB)),
+ m_BasicBlock(ExitSucc), m_BasicBlock(InnerBB))) ||
+ MatchPred != ICmpInst::Predicate::ICMP_EQ ||
+ !InnerLoop->contains(InnerBB))
+ return false;
+
+ // We expect a single use of IndPhi outside of CurLoop. The outside use
+ // should be a PHINode in ExitSucc coming from MatchBB.
+ // Note: Strictly speaking we are not checking for a *single* use of IndPhi
+ // outside of CurLoop here, but below we check that we only exit CurLoop to
+ // ExitSucc in one place, so by construction this should be true. Besides, in
+ // the event it is not, as long as the use is a PHINode in ExitSucc and comes
+ // from MatchBB, the transformation should still be valid in any case.
+ for (Use &U : IndPhi->uses())
+ if (CurLoop->contains(cast<Instruction>(U.getUser())))
+ continue;
+ else if (auto *PN = dyn_cast<PHINode>(U.getUser());
+ !PN || PN->getParent() != ExitSucc ||
+ PN->getIncomingBlock(U) != MatchBB)
+ return false;
+
+ // Match the loads.
+ Value *A, *B;
+ if (!match(LoadA, m_Load(m_Value(A))) || !match(LoadB, m_Load(m_Value(B))))
+ return false;
+
+ // Make sure they are simple.
+ LoadInst *LoadAI = cast<LoadInst>(LoadA);
+ LoadInst *LoadBI = cast<LoadInst>(LoadB);
+ if (!LoadAI->isSimple() || !LoadBI->isSimple())
+ return false;
+
+ // The values loaded come from two PHIs that can only have two incoming
+ // values.
+ PHINode *PNA = dyn_cast<PHINode>(A);
+ PHINode *PNB = dyn_cast<PHINode>(B);
+ if (!PNA || PNA->getNumIncomingValues() != 2 ||
+ !PNB || PNB->getNumIncomingValues() != 2)
+ return false;
+
+ // One PHI comes from the outer loop, the other one from the inner loop.
+ // CurLoop contains PNA, InnerLoop PNB.
+ if (InnerLoop->contains(PNA))
+ std::swap(PNA, PNB);
+ if (PNA != &Header->front() || PNB != &MatchBB->front())
+ return false;
+
+ // The incoming values of both PHI nodes should be a gep of 1.
+ Value *StartA = PNA->getIncomingValue(0);
+ Value *IndexA = PNA->getIncomingValue(1);
+ if (CurLoop->contains(PNA->getIncomingBlock(0)))
+ std::swap(StartA, IndexA);...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/101976
More information about the llvm-commits
mailing list