[llvm] [AArch64] Test all needles when vectorising find_first_of loops. (PR #179298)
Ricardo Jesus via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 2 09:46:29 PST 2026
https://github.com/rj-jesus created https://github.com/llvm/llvm-project/pull/179298
Fixes #179187 - as described in the issue, the current FindFirstByte transformation in LoopIdiomVectorizePass will incorrectly early-exit as soon as a needle matching a search element is found, even if a previous search element could match a subsequent needle.
This patch ensures all needles are tested before we return a matching search element.
>From 0cb99bd8ee69f604b836f34236194720f9b5a936 Mon Sep 17 00:00:00 2001
From: Ricardo Jesus <rjj at nvidia.com>
Date: Mon, 2 Feb 2026 05:02:11 -0800
Subject: [PATCH] [AArch64] Test all needles when vectorising find_first_of
loops.
As described in #179187, the current FindFirstByte transformation in
LoopIdiomVectorizePass will incorrectly early-exit as soon as a needle
matching a search element is found, even if a prior element of the
search vector may match a subsequent needle.
This patch implements the strategy described in the issue to ensure all
needles are tested before we return a matching search element.
---
.../Vectorize/LoopIdiomVectorize.cpp | 71 ++++++-----
.../LoopIdiom/AArch64/find-first-byte.ll | 115 ++++++++++--------
2 files changed, 104 insertions(+), 82 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
index 53129e2e5fbba..d052eb4201dfe 100644
--- a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
@@ -1199,23 +1199,23 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
// (1) Load the search array. Go to (2).
// (2) (a) Load the needle array.
// (b) Splat the first element to the inactive lanes.
- // (c) Check if any elements match. If so go to (3), otherwise go to (4).
- // (3) Compute the index of the first match and exit.
- // (4) Check if we've reached the end of the needle array. If not loop back to
- // (2), otherwise go to (5).
+ // (c) Accumulate any matches found. If we haven't reached the end of the
+ // needle array loop back to (2), otherwise go to (3).
+ // (3) Test if we found any match. If so go to (4), otherwise go to (5).
+ // (4) Compute the index of the first match and exit.
// (5) Check if we've reached the end of the search array. If not loop back to
// (1), otherwise exit.
- // Blocks (0,3) are not part of any loop. Blocks (1,5) and (2,4) belong to
- // the outer and inner loops, respectively.
+ // Blocks (0,4) are not part of any loop. Blocks (1,3,5) and (2) belong to the
+ // outer and inner loops, respectively.
BasicBlock *BB0 = BasicBlock::Create(Ctx, "mem_check", SPH->getParent(), SPH);
BasicBlock *BB1 =
BasicBlock::Create(Ctx, "find_first_vec_header", SPH->getParent(), SPH);
BasicBlock *BB2 =
- BasicBlock::Create(Ctx, "match_check_vec", SPH->getParent(), SPH);
+ BasicBlock::Create(Ctx, "needle_check_vec", SPH->getParent(), SPH);
BasicBlock *BB3 =
- BasicBlock::Create(Ctx, "calculate_match", SPH->getParent(), SPH);
+ BasicBlock::Create(Ctx, "match_check_vec", SPH->getParent(), SPH);
BasicBlock *BB4 =
- BasicBlock::Create(Ctx, "needle_check_vec", SPH->getParent(), SPH);
+ BasicBlock::Create(Ctx, "calculate_match", SPH->getParent(), SPH);
BasicBlock *BB5 =
BasicBlock::Create(Ctx, "search_check_vec", SPH->getParent(), SPH);
@@ -1226,7 +1226,7 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
if (auto ParentLoop = CurLoop->getParentLoop()) {
ParentLoop->addBasicBlockToLoop(BB0, *LI);
ParentLoop->addChildLoop(OuterLoop);
- ParentLoop->addBasicBlockToLoop(BB3, *LI);
+ ParentLoop->addBasicBlockToLoop(BB4, *LI);
} else {
LI->addTopLevelLoop(OuterLoop);
}
@@ -1236,9 +1236,9 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
// Add the new basic blocks to the corresponding loops.
OuterLoop->addBasicBlockToLoop(BB1, *LI);
+ OuterLoop->addBasicBlockToLoop(BB3, *LI);
OuterLoop->addBasicBlockToLoop(BB5, *LI);
InnerLoop->addBasicBlockToLoop(BB2, *LI);
- InnerLoop->addBasicBlockToLoop(BB4, *LI);
// Update the terminator added by SplitBlock to branch to the first block.
Preheader->getTerminator()->setSuccessor(0, BB0);
@@ -1294,12 +1294,14 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
PredSearch = Builder.CreateAnd(PredVF, PredSearch, "search_masked");
Value *LoadSearch = Builder.CreateMaskedLoad(
CharVTy, Search, Align(1), PredSearch, Passthru, "search_load_vec");
+ Value *MatchInit = Constant::getNullValue(PredVTy);
Builder.CreateBr(BB2);
DTU.applyUpdates({{DominatorTree::Insert, BB1, BB2}});
// (2) Inner loop.
Builder.SetInsertPoint(BB2);
PHINode *Needle = Builder.CreatePHI(PtrTy, 2, "pneedle");
+ PHINode *Match = Builder.CreatePHI(PredVTy, 2, "pmatch");
// (2.a) Load the needle array.
Value *PredNeedle = Builder.CreateIntrinsic(
@@ -1320,17 +1322,27 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
LoadNeedle = Builder.CreateExtractVector(
FixedVectorType::get(CharTy, VF), LoadNeedle, uint64_t(0), "needle_vec");
- // (2.c) Test if there's a match.
- Value *MatchPred = Builder.CreateIntrinsic(
+ // (2.c) Accumulate matches.
+ Value *MatchSeg = Builder.CreateIntrinsic(
Intrinsic::experimental_vector_match, {CharVTy, LoadNeedle->getType()},
- {LoadSearch, LoadNeedle, PredSearch}, nullptr, "match_pred");
- Value *IfAnyMatch = Builder.CreateOrReduce(MatchPred);
- Builder.CreateCondBr(IfAnyMatch, BB3, BB4);
+ {LoadSearch, LoadNeedle, PredSearch}, nullptr, "match_segment");
+ Value *MatchAcc = Builder.CreateOr(Match, MatchSeg, "match_accumulator");
+ Value *NextNeedle =
+ Builder.CreateGEP(CharTy, Needle, ConstVF, "needle_next_vec");
+ Builder.CreateCondBr(Builder.CreateICmpULT(NextNeedle, NeedleEnd), BB2, BB3);
DTU.applyUpdates(
- {{DominatorTree::Insert, BB2, BB3}, {DominatorTree::Insert, BB2, BB4}});
+ {{DominatorTree::Insert, BB2, BB2}, {DominatorTree::Insert, BB2, BB3}});
- // (3) We found a match. Compute the index of its location and exit.
+ // (3) Check if we found a match.
Builder.SetInsertPoint(BB3);
+ PHINode *MatchPred = Builder.CreatePHI(PredVTy, 1, "match_pred");
+ Value *IfAnyMatch = Builder.CreateOrReduce(MatchPred);
+ Builder.CreateCondBr(IfAnyMatch, BB4, BB5);
+ DTU.applyUpdates(
+ {{DominatorTree::Insert, BB3, BB4}, {DominatorTree::Insert, BB3, BB5}});
+
+ // (4) We found a match. Compute the index of its location and exit.
+ Builder.SetInsertPoint(BB4);
PHINode *MatchLCSSA = Builder.CreatePHI(PtrTy, 1, "match_start");
PHINode *MatchPredLCSSA =
Builder.CreatePHI(MatchPred->getType(), 1, "match_vec");
@@ -1341,15 +1353,7 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
Value *MatchVal =
Builder.CreateGEP(CharTy, MatchLCSSA, MatchCnt, "match_res");
Builder.CreateBr(ExitSucc);
- DTU.applyUpdates({{DominatorTree::Insert, BB3, ExitSucc}});
-
- // (4) Check if we've reached the end of the needle array.
- Builder.SetInsertPoint(BB4);
- Value *NextNeedle =
- Builder.CreateGEP(CharTy, Needle, ConstVF, "needle_next_vec");
- Builder.CreateCondBr(Builder.CreateICmpULT(NextNeedle, NeedleEnd), BB2, BB5);
- DTU.applyUpdates(
- {{DominatorTree::Insert, BB4, BB2}, {DominatorTree::Insert, BB4, BB5}});
+ DTU.applyUpdates({{DominatorTree::Insert, BB4, ExitSucc}});
// (5) Check if we've reached the end of the search array.
Builder.SetInsertPoint(BB5);
@@ -1364,14 +1368,17 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
Search->addIncoming(SearchStart, BB0);
Search->addIncoming(NextSearch, BB5);
Needle->addIncoming(NeedleStart, BB1);
- Needle->addIncoming(NextNeedle, BB4);
+ Needle->addIncoming(NextNeedle, BB2);
+ Match->addIncoming(MatchInit, BB1);
+ Match->addIncoming(MatchAcc, BB2);
// These are needed to retain LCSSA form.
- MatchLCSSA->addIncoming(Search, BB2);
- MatchPredLCSSA->addIncoming(MatchPred, BB2);
+ MatchPred->addIncoming(MatchAcc, BB2);
+ MatchLCSSA->addIncoming(Search, BB3);
+ MatchPredLCSSA->addIncoming(MatchPred, BB3);
- // Ensure all Phis in the successors of BB3/BB5 have an incoming value from
+ // Ensure all Phis in the successors of BB4/BB5 have an incoming value from
// them.
- fixSuccessorPhis(CurLoop, IndPhi, MatchVal, ExitSucc, BB3);
+ fixSuccessorPhis(CurLoop, IndPhi, MatchVal, ExitSucc, BB4);
if (ExitSucc != ExitFail)
fixSuccessorPhis(CurLoop, IndPhi, MatchVal, ExitFail, BB5);
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll b/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll
index 62d15b591c256..0ad9f1dc4c859 100644
--- a/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll
+++ b/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll
@@ -40,9 +40,10 @@ define ptr @find_first_of_i8(ptr %search_start, ptr %search_end, ptr %needle_sta
; CHECK-NEXT: [[SEARCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[SEARCH_END_INT]])
; CHECK-NEXT: [[SEARCH_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[SEARCH_PRED]]
; CHECK-NEXT: [[SEARCH_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[PSEARCH]], <vscale x 16 x i1> [[SEARCH_MASKED]], <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT: br label %[[MATCH_CHECK_VEC:.*]]
-; CHECK: [[MATCH_CHECK_VEC]]:
-; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC:.*]] ]
+; CHECK-NEXT: br label %[[NEEDLE_CHECK_VEC:.*]]
+; CHECK: [[NEEDLE_CHECK_VEC]]:
+; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[PMATCH:%.*]] = phi <vscale x 16 x i1> [ zeroinitializer, %[[FIND_FIRST_VEC_HEADER]] ], [ [[MATCH_ACCUMULATOR:%.*]], %[[NEEDLE_CHECK_VEC]] ]
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[PNEEDLE]] to i64
; CHECK-NEXT: [[NEEDLE_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP2]], i64 [[NEEDLE_END_INT]])
; CHECK-NEXT: [[NEEDLE_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[NEEDLE_PRED]]
@@ -52,19 +53,21 @@ define ptr @find_first_of_i8(ptr %search_start, ptr %search_end, ptr %needle_sta
; CHECK-NEXT: [[NEEDLE0_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[NEEDLE0_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: [[NEEDLE_SPLAT:%.*]] = select <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], <vscale x 16 x i8> [[NEEDLE0_SPLAT]]
; CHECK-NEXT: [[NEEDLE_VEC:%.*]] = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[NEEDLE_SPLAT]], i64 0)
-; CHECK-NEXT: [[MATCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
-; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
-; CHECK-NEXT: br i1 [[TMP3]], label %[[CALCULATE_MATCH:.*]], label %[[NEEDLE_CHECK_VEC]]
+; CHECK-NEXT: [[MATCH_SEGMENT:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
+; CHECK-NEXT: [[MATCH_ACCUMULATOR]] = or <vscale x 16 x i1> [[PMATCH]], [[MATCH_SEGMENT]]
+; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[NEEDLE_CHECK_VEC]], label %[[MATCH_CHECK_VEC:.*]]
+; CHECK: [[MATCH_CHECK_VEC]]:
+; CHECK-NEXT: [[MATCH_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_ACCUMULATOR]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
+; CHECK-NEXT: br i1 [[TMP4]], label %[[CALCULATE_MATCH:.*]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[CALCULATE_MATCH]]:
; CHECK-NEXT: [[MATCH_START:%.*]] = phi ptr [ [[PSEARCH]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_VEC:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_PRED]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_IDX:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[MATCH_VEC]], i1 true)
; CHECK-NEXT: [[MATCH_RES:%.*]] = getelementptr i8, ptr [[MATCH_START]], i64 [[MATCH_IDX]]
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
-; CHECK: [[NEEDLE_CHECK_VEC]]:
-; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[MATCH_CHECK_VEC]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[SEARCH_CHECK_VEC]]:
; CHECK-NEXT: [[SEARCH_NEXT_VEC]] = getelementptr i8, ptr [[PSEARCH]], i64 16
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[SEARCH_NEXT_VEC]], [[SEARCH_END]]
@@ -196,9 +199,10 @@ define ptr @find_first_of_i16(ptr %search_start, ptr %search_end, ptr %needle_st
; CHECK-NEXT: [[SEARCH_PRED:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP1]], i64 [[SEARCH_END_INT]])
; CHECK-NEXT: [[SEARCH_MASKED:%.*]] = and <vscale x 8 x i1> [[TMP0]], [[SEARCH_PRED]]
; CHECK-NEXT: [[SEARCH_LOAD_VEC:%.*]] = call <vscale x 8 x i16> @llvm.masked.load.nxv8i16.p0(ptr align 1 [[PSEARCH]], <vscale x 8 x i1> [[SEARCH_MASKED]], <vscale x 8 x i16> zeroinitializer)
-; CHECK-NEXT: br label %[[MATCH_CHECK_VEC:.*]]
-; CHECK: [[MATCH_CHECK_VEC]]:
-; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC:.*]] ]
+; CHECK-NEXT: br label %[[NEEDLE_CHECK_VEC:.*]]
+; CHECK: [[NEEDLE_CHECK_VEC]]:
+; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[PMATCH:%.*]] = phi <vscale x 8 x i1> [ zeroinitializer, %[[FIND_FIRST_VEC_HEADER]] ], [ [[MATCH_ACCUMULATOR:%.*]], %[[NEEDLE_CHECK_VEC]] ]
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[PNEEDLE]] to i64
; CHECK-NEXT: [[NEEDLE_PRED:%.*]] = call <vscale x 8 x i1> @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[TMP2]], i64 [[NEEDLE_END_INT]])
; CHECK-NEXT: [[NEEDLE_MASKED:%.*]] = and <vscale x 8 x i1> [[TMP0]], [[NEEDLE_PRED]]
@@ -208,19 +212,21 @@ define ptr @find_first_of_i16(ptr %search_start, ptr %search_end, ptr %needle_st
; CHECK-NEXT: [[NEEDLE0_SPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[NEEDLE0_SPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer
; CHECK-NEXT: [[NEEDLE_SPLAT:%.*]] = select <vscale x 8 x i1> [[NEEDLE_MASKED]], <vscale x 8 x i16> [[NEEDLE_LOAD_VEC]], <vscale x 8 x i16> [[NEEDLE0_SPLAT]]
; CHECK-NEXT: [[NEEDLE_VEC:%.*]] = call <8 x i16> @llvm.vector.extract.v8i16.nxv8i16(<vscale x 8 x i16> [[NEEDLE_SPLAT]], i64 0)
-; CHECK-NEXT: [[MATCH_PRED:%.*]] = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> [[SEARCH_LOAD_VEC]], <8 x i16> [[NEEDLE_VEC]], <vscale x 8 x i1> [[SEARCH_MASKED]])
-; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.nxv8i1(<vscale x 8 x i1> [[MATCH_PRED]])
-; CHECK-NEXT: br i1 [[TMP3]], label %[[CALCULATE_MATCH:.*]], label %[[NEEDLE_CHECK_VEC]]
+; CHECK-NEXT: [[MATCH_SEGMENT:%.*]] = call <vscale x 8 x i1> @llvm.experimental.vector.match.nxv8i16.v8i16(<vscale x 8 x i16> [[SEARCH_LOAD_VEC]], <8 x i16> [[NEEDLE_VEC]], <vscale x 8 x i1> [[SEARCH_MASKED]])
+; CHECK-NEXT: [[MATCH_ACCUMULATOR]] = or <vscale x 8 x i1> [[PMATCH]], [[MATCH_SEGMENT]]
+; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i16, ptr [[PNEEDLE]], i64 8
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[NEEDLE_CHECK_VEC]], label %[[MATCH_CHECK_VEC:.*]]
+; CHECK: [[MATCH_CHECK_VEC]]:
+; CHECK-NEXT: [[MATCH_PRED:%.*]] = phi <vscale x 8 x i1> [ [[MATCH_ACCUMULATOR]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.nxv8i1(<vscale x 8 x i1> [[MATCH_PRED]])
+; CHECK-NEXT: br i1 [[TMP4]], label %[[CALCULATE_MATCH:.*]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[CALCULATE_MATCH]]:
; CHECK-NEXT: [[MATCH_START:%.*]] = phi ptr [ [[PSEARCH]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_VEC:%.*]] = phi <vscale x 8 x i1> [ [[MATCH_PRED]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_IDX:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1(<vscale x 8 x i1> [[MATCH_VEC]], i1 true)
; CHECK-NEXT: [[MATCH_RES:%.*]] = getelementptr i16, ptr [[MATCH_START]], i64 [[MATCH_IDX]]
; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
-; CHECK: [[NEEDLE_CHECK_VEC]]:
-; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i16, ptr [[PNEEDLE]], i64 8
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[MATCH_CHECK_VEC]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[SEARCH_CHECK_VEC]]:
; CHECK-NEXT: [[SEARCH_NEXT_VEC]] = getelementptr i16, ptr [[PSEARCH]], i64 8
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[SEARCH_NEXT_VEC]], [[SEARCH_END]]
@@ -352,9 +358,10 @@ define ptr @find_first_of_i8_multi_exit(ptr %search_start, ptr %search_end, ptr
; CHECK-NEXT: [[SEARCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[SEARCH_END_INT]])
; CHECK-NEXT: [[SEARCH_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[SEARCH_PRED]]
; CHECK-NEXT: [[SEARCH_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[PSEARCH]], <vscale x 16 x i1> [[SEARCH_MASKED]], <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT: br label %[[MATCH_CHECK_VEC:.*]]
-; CHECK: [[MATCH_CHECK_VEC]]:
-; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC:.*]] ]
+; CHECK-NEXT: br label %[[NEEDLE_CHECK_VEC:.*]]
+; CHECK: [[NEEDLE_CHECK_VEC]]:
+; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[PMATCH:%.*]] = phi <vscale x 16 x i1> [ zeroinitializer, %[[FIND_FIRST_VEC_HEADER]] ], [ [[MATCH_ACCUMULATOR:%.*]], %[[NEEDLE_CHECK_VEC]] ]
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[PNEEDLE]] to i64
; CHECK-NEXT: [[NEEDLE_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP2]], i64 [[NEEDLE_END_INT]])
; CHECK-NEXT: [[NEEDLE_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[NEEDLE_PRED]]
@@ -364,19 +371,21 @@ define ptr @find_first_of_i8_multi_exit(ptr %search_start, ptr %search_end, ptr
; CHECK-NEXT: [[NEEDLE0_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[NEEDLE0_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: [[NEEDLE_SPLAT:%.*]] = select <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], <vscale x 16 x i8> [[NEEDLE0_SPLAT]]
; CHECK-NEXT: [[NEEDLE_VEC:%.*]] = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[NEEDLE_SPLAT]], i64 0)
-; CHECK-NEXT: [[MATCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
-; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
-; CHECK-NEXT: br i1 [[TMP3]], label %[[CALCULATE_MATCH:.*]], label %[[NEEDLE_CHECK_VEC]]
+; CHECK-NEXT: [[MATCH_SEGMENT:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
+; CHECK-NEXT: [[MATCH_ACCUMULATOR]] = or <vscale x 16 x i1> [[PMATCH]], [[MATCH_SEGMENT]]
+; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[NEEDLE_CHECK_VEC]], label %[[MATCH_CHECK_VEC:.*]]
+; CHECK: [[MATCH_CHECK_VEC]]:
+; CHECK-NEXT: [[MATCH_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_ACCUMULATOR]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
+; CHECK-NEXT: br i1 [[TMP4]], label %[[CALCULATE_MATCH:.*]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[CALCULATE_MATCH]]:
; CHECK-NEXT: [[MATCH_START:%.*]] = phi ptr [ [[PSEARCH]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_VEC:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_PRED]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_IDX:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[MATCH_VEC]], i1 true)
; CHECK-NEXT: [[MATCH_RES:%.*]] = getelementptr i8, ptr [[MATCH_START]], i64 [[MATCH_IDX]]
; CHECK-NEXT: br label %[[EXIT_SUCC:.*]]
-; CHECK: [[NEEDLE_CHECK_VEC]]:
-; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[MATCH_CHECK_VEC]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[SEARCH_CHECK_VEC]]:
; CHECK-NEXT: [[SEARCH_NEXT_VEC]] = getelementptr i8, ptr [[PSEARCH]], i64 16
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[SEARCH_NEXT_VEC]], [[SEARCH_END]]
@@ -515,9 +524,10 @@ define ptr @ensure_not_found_successors_fixed(ptr %search_start, ptr %search_end
; CHECK-NEXT: [[SEARCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[SEARCH_END_INT]])
; CHECK-NEXT: [[SEARCH_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[SEARCH_PRED]]
; CHECK-NEXT: [[SEARCH_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[PSEARCH]], <vscale x 16 x i1> [[SEARCH_MASKED]], <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT: br label %[[MATCH_CHECK_VEC:.*]]
-; CHECK: [[MATCH_CHECK_VEC]]:
-; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC:.*]] ]
+; CHECK-NEXT: br label %[[NEEDLE_CHECK_VEC:.*]]
+; CHECK: [[NEEDLE_CHECK_VEC]]:
+; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[PMATCH:%.*]] = phi <vscale x 16 x i1> [ zeroinitializer, %[[FIND_FIRST_VEC_HEADER]] ], [ [[MATCH_ACCUMULATOR:%.*]], %[[NEEDLE_CHECK_VEC]] ]
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[PNEEDLE]] to i64
; CHECK-NEXT: [[NEEDLE_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP2]], i64 [[NEEDLE_END_INT]])
; CHECK-NEXT: [[NEEDLE_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[NEEDLE_PRED]]
@@ -527,19 +537,21 @@ define ptr @ensure_not_found_successors_fixed(ptr %search_start, ptr %search_end
; CHECK-NEXT: [[NEEDLE0_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[NEEDLE0_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: [[NEEDLE_SPLAT:%.*]] = select <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], <vscale x 16 x i8> [[NEEDLE0_SPLAT]]
; CHECK-NEXT: [[NEEDLE_VEC:%.*]] = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[NEEDLE_SPLAT]], i64 0)
-; CHECK-NEXT: [[MATCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
-; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
-; CHECK-NEXT: br i1 [[TMP3]], label %[[CALCULATE_MATCH:.*]], label %[[NEEDLE_CHECK_VEC]]
+; CHECK-NEXT: [[MATCH_SEGMENT:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
+; CHECK-NEXT: [[MATCH_ACCUMULATOR]] = or <vscale x 16 x i1> [[PMATCH]], [[MATCH_SEGMENT]]
+; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[NEEDLE_CHECK_VEC]], label %[[MATCH_CHECK_VEC:.*]]
+; CHECK: [[MATCH_CHECK_VEC]]:
+; CHECK-NEXT: [[MATCH_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_ACCUMULATOR]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
+; CHECK-NEXT: br i1 [[TMP4]], label %[[CALCULATE_MATCH:.*]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[CALCULATE_MATCH]]:
; CHECK-NEXT: [[MATCH_START:%.*]] = phi ptr [ [[PSEARCH]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_VEC:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_PRED]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_IDX:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[MATCH_VEC]], i1 true)
; CHECK-NEXT: [[MATCH_RES:%.*]] = getelementptr i8, ptr [[MATCH_START]], i64 [[MATCH_IDX]]
; CHECK-NEXT: br label %[[FOUND_MATCH:.*]]
-; CHECK: [[NEEDLE_CHECK_VEC]]:
-; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[MATCH_CHECK_VEC]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[SEARCH_CHECK_VEC]]:
; CHECK-NEXT: [[SEARCH_NEXT_VEC]] = getelementptr i8, ptr [[PSEARCH]], i64 16
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[SEARCH_NEXT_VEC]], [[SEARCH_END]]
@@ -666,9 +678,10 @@ define ptr @ensure_not_found_successors_fixed2(ptr %search_start, ptr %search_en
; CHECK-NEXT: [[SEARCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[SEARCH_END_INT]])
; CHECK-NEXT: [[SEARCH_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[SEARCH_PRED]]
; CHECK-NEXT: [[SEARCH_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr align 1 [[PSEARCH]], <vscale x 16 x i1> [[SEARCH_MASKED]], <vscale x 16 x i8> zeroinitializer)
-; CHECK-NEXT: br label %[[MATCH_CHECK_VEC:.*]]
-; CHECK: [[MATCH_CHECK_VEC]]:
-; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC:.*]] ]
+; CHECK-NEXT: br label %[[NEEDLE_CHECK_VEC:.*]]
+; CHECK: [[NEEDLE_CHECK_VEC]]:
+; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[PMATCH:%.*]] = phi <vscale x 16 x i1> [ zeroinitializer, %[[FIND_FIRST_VEC_HEADER]] ], [ [[MATCH_ACCUMULATOR:%.*]], %[[NEEDLE_CHECK_VEC]] ]
; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[PNEEDLE]] to i64
; CHECK-NEXT: [[NEEDLE_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP2]], i64 [[NEEDLE_END_INT]])
; CHECK-NEXT: [[NEEDLE_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[NEEDLE_PRED]]
@@ -678,19 +691,21 @@ define ptr @ensure_not_found_successors_fixed2(ptr %search_start, ptr %search_en
; CHECK-NEXT: [[NEEDLE0_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[NEEDLE0_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
; CHECK-NEXT: [[NEEDLE_SPLAT:%.*]] = select <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], <vscale x 16 x i8> [[NEEDLE0_SPLAT]]
; CHECK-NEXT: [[NEEDLE_VEC:%.*]] = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[NEEDLE_SPLAT]], i64 0)
-; CHECK-NEXT: [[MATCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
-; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
-; CHECK-NEXT: br i1 [[TMP3]], label %[[CALCULATE_MATCH:.*]], label %[[NEEDLE_CHECK_VEC]]
+; CHECK-NEXT: [[MATCH_SEGMENT:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
+; CHECK-NEXT: [[MATCH_ACCUMULATOR]] = or <vscale x 16 x i1> [[PMATCH]], [[MATCH_SEGMENT]]
+; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[NEEDLE_CHECK_VEC]], label %[[MATCH_CHECK_VEC:.*]]
+; CHECK: [[MATCH_CHECK_VEC]]:
+; CHECK-NEXT: [[MATCH_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_ACCUMULATOR]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
+; CHECK-NEXT: br i1 [[TMP4]], label %[[CALCULATE_MATCH:.*]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[CALCULATE_MATCH]]:
; CHECK-NEXT: [[MATCH_START:%.*]] = phi ptr [ [[PSEARCH]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_VEC:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_PRED]], %[[MATCH_CHECK_VEC]] ]
; CHECK-NEXT: [[MATCH_IDX:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[MATCH_VEC]], i1 true)
; CHECK-NEXT: [[MATCH_RES:%.*]] = getelementptr i8, ptr [[MATCH_START]], i64 [[MATCH_IDX]]
; CHECK-NEXT: br label %[[FOUND_MATCH:.*]]
-; CHECK: [[NEEDLE_CHECK_VEC]]:
-; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr [[PNEEDLE]], i64 16
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ult ptr [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
-; CHECK-NEXT: br i1 [[TMP4]], label %[[MATCH_CHECK_VEC]], label %[[SEARCH_CHECK_VEC]]
; CHECK: [[SEARCH_CHECK_VEC]]:
; CHECK-NEXT: [[SEARCH_NEXT_VEC]] = getelementptr i8, ptr [[PSEARCH]], i64 16
; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr [[SEARCH_NEXT_VEC]], [[SEARCH_END]]
More information about the llvm-commits
mailing list