[llvm] [LoopIdiomVectorize] Preserve address space in FindFirstByte (PR #185226)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 10 18:47:14 PDT 2026
https://github.com/sweiglbosker updated https://github.com/llvm/llvm-project/pull/185226
>From c1f40d47d9346b5185caf3c691f8ced0795da3e5 Mon Sep 17 00:00:00 2001
From: Stefan Weigl-Bosker <stefan at s00.xyz>
Date: Sat, 7 Mar 2026 15:05:56 -0500
Subject: [PATCH 1/2] [LoopIdiomVectorize] Preserve address space in
FindFirstByte
---
.../Vectorize/LoopIdiomVectorize.cpp | 2 +-
.../LoopIdiom/AArch64/find-first-byte.ll | 39 +++++++++++++++++++
2 files changed, 40 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
index cca7a3f5ef007..02e092beecb4d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
@@ -1206,7 +1206,7 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
Value *SearchStart, Value *SearchEnd, Value *NeedleStart,
Value *NeedleEnd) {
// Set up some types and constants that we intend to reuse.
- auto *PtrTy = Builder.getPtrTy();
+ auto *PtrTy = SearchStart->getType();
auto *I64Ty = Builder.getInt64Ty();
auto *PredVTy = ScalableVectorType::get(Builder.getInt1Ty(), VF);
auto *CharVTy = ScalableVectorType::get(CharTy, VF);
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll b/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll
index 0ad9f1dc4c859..e91d24dc9556d 100644
--- a/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll
+++ b/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll
@@ -974,6 +974,45 @@ exit:
ret ptr %res
}
+; Same as @find_first_of_i8 but with non-zero address space pointers.
+; This verifies that the vectorized loop correctly preserves address spaces.
+define ptr addrspace(1) @find_first_of_i8_addrspace(ptr addrspace(1) %search_start, ptr addrspace(1) %search_end, ptr addrspace(1) %needle_start, ptr addrspace(1) %needle_end) #0 {
+; CHECK-LABEL: define ptr addrspace(1) @find_first_of_i8_addrspace(
+; CHECK: @llvm.experimental.vector.match
+; CHECK: ret ptr addrspace(1)
+;
+entry:
+ %search_test = icmp eq ptr addrspace(1) %search_start, %search_end
+ %needle_test = icmp eq ptr addrspace(1) %needle_start, %needle_end
+ %combined_test = or i1 %search_test, %needle_test
+ br i1 %combined_test, label %exit, label %header
+
+header:
+ %search_ptr = phi ptr addrspace(1) [ %search_next, %search_check ], [ %search_start, %entry ]
+ %search_load = load i8, ptr addrspace(1) %search_ptr, align 1
+ br label %match_check
+
+needle_check:
+ %needle_next = getelementptr inbounds i8, ptr addrspace(1) %needle_ptr, i64 1
+ %needle_cmp = icmp eq ptr addrspace(1) %needle_next, %needle_end
+ br i1 %needle_cmp, label %search_check, label %match_check
+
+match_check:
+ %needle_ptr = phi ptr addrspace(1) [ %needle_start, %header ], [ %needle_next, %needle_check ]
+ %needle_load = load i8, ptr addrspace(1) %needle_ptr, align 1
+ %match_cmp = icmp eq i8 %search_load, %needle_load
+ br i1 %match_cmp, label %exit, label %needle_check
+
+search_check:
+ %search_next = getelementptr inbounds i8, ptr addrspace(1) %search_ptr, i64 1
+ %search_cmp = icmp eq ptr addrspace(1) %search_next, %search_end
+ br i1 %search_cmp, label %exit, label %header
+
+exit:
+ %res = phi ptr addrspace(1) [ %search_end, %entry ], [ %search_ptr, %match_check ], [ %search_end, %search_check ]
+ ret ptr addrspace(1) %res
+}
+
attributes #0 = { "target-features"="+sve2" }
;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 10, i32 90}
>From 25ecfaae49c06aee596f815a5f483929c0fd148e Mon Sep 17 00:00:00 2001
From: Stefan Weigl-Bosker <stefan at s00.xyz>
Date: Tue, 10 Mar 2026 21:47:01 -0400
Subject: [PATCH 2/2] handle mixing address spaces, move test
---
.../Vectorize/LoopIdiomVectorize.cpp | 4 +-
.../LoopIdiom/AArch64/find-first-byte.ll | 198 ++++++++++++++----
2 files changed, 162 insertions(+), 40 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
index 02e092beecb4d..0666380c6cebb 100644
--- a/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopIdiomVectorize.cpp
@@ -1207,6 +1207,8 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
Value *NeedleEnd) {
// Set up some types and constants that we intend to reuse.
auto *PtrTy = SearchStart->getType();
+ auto *NeedlePtrTy = NeedleStart->getType();
+
auto *I64Ty = Builder.getInt64Ty();
auto *PredVTy = ScalableVectorType::get(Builder.getInt1Ty(), VF);
auto *CharVTy = ScalableVectorType::get(CharTy, VF);
@@ -1335,7 +1337,7 @@ Value *LoopIdiomVectorize::expandFindFirstByte(
// (2) Inner loop.
Builder.SetInsertPoint(BB2);
- PHINode *Needle = Builder.CreatePHI(PtrTy, 2, "pneedle");
+ PHINode *Needle = Builder.CreatePHI(NeedlePtrTy, 2, "pneedle");
PHINode *Match = Builder.CreatePHI(PredVTy, 2, "pmatch");
// (2.a) Load the needle array.
diff --git a/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll b/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll
index e91d24dc9556d..f9956ebc9eba6 100644
--- a/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll
+++ b/llvm/test/Transforms/LoopIdiom/AArch64/find-first-byte.ll
@@ -798,6 +798,165 @@ not_found:
ret ptr null
}
+; Same as @find_first_of_i8 but mixing address spaces
+; This verifies that the vectorized loop correctly preserves address spaces.
+define ptr addrspace(1) @find_first_of_i8_addrspace(ptr addrspace(1) %search_start, ptr addrspace(1) %search_end, ptr addrspace(2) %needle_start, ptr addrspace(2) %needle_end) #0 {
+; CHECK-LABEL: define ptr addrspace(1) @find_first_of_i8_addrspace(
+; CHECK-SAME: ptr addrspace(1) [[SEARCH_START:%.*]], ptr addrspace(1) [[SEARCH_END:%.*]], ptr addrspace(2) [[NEEDLE_START:%.*]], ptr addrspace(2) [[NEEDLE_END:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[SEARCH_TEST:%.*]] = icmp eq ptr addrspace(1) [[SEARCH_START]], [[SEARCH_END]]
+; CHECK-NEXT: [[NEEDLE_TEST:%.*]] = icmp eq ptr addrspace(2) [[NEEDLE_START]], [[NEEDLE_END]]
+; CHECK-NEXT: [[COMBINED_TEST:%.*]] = or i1 [[SEARCH_TEST]], [[NEEDLE_TEST]]
+; CHECK-NEXT: br i1 [[COMBINED_TEST]], label %[[EXIT:.*]], label %[[HEADER_PREHEADER:.*]]
+; CHECK: [[HEADER_PREHEADER]]:
+; CHECK-NEXT: br label %[[MEM_CHECK:.*]]
+; CHECK: [[MEM_CHECK]]:
+; CHECK-NEXT: [[SEARCH_START_INT:%.*]] = ptrtoint ptr addrspace(1) [[SEARCH_START]] to i64
+; CHECK-NEXT: [[SEARCH_END_INT:%.*]] = ptrtoint ptr addrspace(1) [[SEARCH_END]] to i64
+; CHECK-NEXT: [[NEEDLE_START_INT:%.*]] = ptrtoint ptr addrspace(2) [[NEEDLE_START]] to i64
+; CHECK-NEXT: [[NEEDLE_END_INT:%.*]] = ptrtoint ptr addrspace(2) [[NEEDLE_END]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 0, i64 16)
+; CHECK-NEXT: [[SEARCH_START_PAGE:%.*]] = lshr i64 [[SEARCH_START_INT]], 12
+; CHECK-NEXT: [[SEARCH_END_PAGE:%.*]] = lshr i64 [[SEARCH_END_INT]], 12
+; CHECK-NEXT: [[NEEDLE_START_PAGE:%.*]] = lshr i64 [[NEEDLE_START_INT]], 12
+; CHECK-NEXT: [[NEEDLE_END_PAGE:%.*]] = lshr i64 [[NEEDLE_END_INT]], 12
+; CHECK-NEXT: [[SEARCH_PAGE_CMP:%.*]] = icmp ne i64 [[SEARCH_START_PAGE]], [[SEARCH_END_PAGE]]
+; CHECK-NEXT: [[NEEDLE_PAGE_CMP:%.*]] = icmp ne i64 [[NEEDLE_START_PAGE]], [[NEEDLE_END_PAGE]]
+; CHECK-NEXT: [[COMBINED_PAGE_CMP:%.*]] = or i1 [[SEARCH_PAGE_CMP]], [[NEEDLE_PAGE_CMP]]
+; CHECK-NEXT: br i1 [[COMBINED_PAGE_CMP]], label %[[SCALAR_PREHEADER:.*]], label %[[FIND_FIRST_VEC_HEADER:.*]], !prof [[PROF0]]
+; CHECK: [[FIND_FIRST_VEC_HEADER]]:
+; CHECK-NEXT: [[PSEARCH:%.*]] = phi ptr addrspace(1) [ [[SEARCH_START]], %[[MEM_CHECK]] ], [ [[SEARCH_NEXT_VEC:%.*]], %[[SEARCH_CHECK_VEC:.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(1) [[PSEARCH]] to i64
+; CHECK-NEXT: [[SEARCH_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP1]], i64 [[SEARCH_END_INT]])
+; CHECK-NEXT: [[SEARCH_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[SEARCH_PRED]]
+; CHECK-NEXT: [[SEARCH_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p1(ptr addrspace(1) align 1 [[PSEARCH]], <vscale x 16 x i1> [[SEARCH_MASKED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: br label %[[NEEDLE_CHECK_VEC:.*]]
+; CHECK: [[NEEDLE_CHECK_VEC]]:
+; CHECK-NEXT: [[PNEEDLE:%.*]] = phi ptr addrspace(2) [ [[NEEDLE_START]], %[[FIND_FIRST_VEC_HEADER]] ], [ [[NEEDLE_NEXT_VEC:%.*]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[PMATCH:%.*]] = phi <vscale x 16 x i1> [ zeroinitializer, %[[FIND_FIRST_VEC_HEADER]] ], [ [[MATCH_ACCUMULATOR:%.*]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr addrspace(2) [[PNEEDLE]] to i64
+; CHECK-NEXT: [[NEEDLE_PRED:%.*]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[TMP2]], i64 [[NEEDLE_END_INT]])
+; CHECK-NEXT: [[NEEDLE_MASKED:%.*]] = and <vscale x 16 x i1> [[TMP0]], [[NEEDLE_PRED]]
+; CHECK-NEXT: [[NEEDLE_LOAD_VEC:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p2(ptr addrspace(2) align 1 [[PNEEDLE]], <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> zeroinitializer)
+; CHECK-NEXT: [[NEEDLE0:%.*]] = extractelement <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], i64 0
+; CHECK-NEXT: [[NEEDLE0_SPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[NEEDLE0]], i64 0
+; CHECK-NEXT: [[NEEDLE0_SPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[NEEDLE0_SPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT: [[NEEDLE_SPLAT:%.*]] = select <vscale x 16 x i1> [[NEEDLE_MASKED]], <vscale x 16 x i8> [[NEEDLE_LOAD_VEC]], <vscale x 16 x i8> [[NEEDLE0_SPLAT]]
+; CHECK-NEXT: [[NEEDLE_VEC:%.*]] = call <16 x i8> @llvm.vector.extract.v16i8.nxv16i8(<vscale x 16 x i8> [[NEEDLE_SPLAT]], i64 0)
+; CHECK-NEXT: [[MATCH_SEGMENT:%.*]] = call <vscale x 16 x i1> @llvm.experimental.vector.match.nxv16i8.v16i8(<vscale x 16 x i8> [[SEARCH_LOAD_VEC]], <16 x i8> [[NEEDLE_VEC]], <vscale x 16 x i1> [[SEARCH_MASKED]])
+; CHECK-NEXT: [[MATCH_ACCUMULATOR]] = or <vscale x 16 x i1> [[PMATCH]], [[MATCH_SEGMENT]]
+; CHECK-NEXT: [[NEEDLE_NEXT_VEC]] = getelementptr i8, ptr addrspace(2) [[PNEEDLE]], i64 16
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult ptr addrspace(2) [[NEEDLE_NEXT_VEC]], [[NEEDLE_END]]
+; CHECK-NEXT: br i1 [[TMP3]], label %[[NEEDLE_CHECK_VEC]], label %[[MATCH_CHECK_VEC:.*]]
+; CHECK: [[MATCH_CHECK_VEC]]:
+; CHECK-NEXT: [[MATCH_PRED:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_ACCUMULATOR]], %[[NEEDLE_CHECK_VEC]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[MATCH_PRED]])
+; CHECK-NEXT: br i1 [[TMP4]], label %[[CALCULATE_MATCH:.*]], label %[[SEARCH_CHECK_VEC]]
+; CHECK: [[CALCULATE_MATCH]]:
+; CHECK-NEXT: [[MATCH_START:%.*]] = phi ptr addrspace(1) [ [[PSEARCH]], %[[MATCH_CHECK_VEC]] ]
+; CHECK-NEXT: [[MATCH_VEC:%.*]] = phi <vscale x 16 x i1> [ [[MATCH_PRED]], %[[MATCH_CHECK_VEC]] ]
+; CHECK-NEXT: [[MATCH_IDX:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[MATCH_VEC]], i1 true)
+; CHECK-NEXT: [[MATCH_RES:%.*]] = getelementptr i8, ptr addrspace(1) [[MATCH_START]], i64 [[MATCH_IDX]]
+; CHECK-NEXT: br label %[[EXIT_LOOPEXIT:.*]]
+; CHECK: [[SEARCH_CHECK_VEC]]:
+; CHECK-NEXT: [[SEARCH_NEXT_VEC]] = getelementptr i8, ptr addrspace(1) [[PSEARCH]], i64 16
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ult ptr addrspace(1) [[SEARCH_NEXT_VEC]], [[SEARCH_END]]
+; CHECK-NEXT: br i1 [[TMP5]], label %[[FIND_FIRST_VEC_HEADER]], label %[[EXIT_LOOPEXIT1:.*]]
+; CHECK: [[SCALAR_PREHEADER]]:
+; CHECK-NEXT: br label %[[HEADER:.*]]
+; CHECK: [[HEADER]]:
+; CHECK-NEXT: [[SEARCH_PTR:%.*]] = phi ptr addrspace(1) [ [[SEARCH_NEXT:%.*]], %[[SEARCH_CHECK:.*]] ], [ [[SEARCH_START]], %[[SCALAR_PREHEADER]] ]
+; CHECK-NEXT: [[SEARCH_LOAD:%.*]] = load i8, ptr addrspace(1) [[SEARCH_PTR]], align 1
+; CHECK-NEXT: br label %[[MATCH_CHECK:.*]]
+; CHECK: [[NEEDLE_CHECK:.*]]:
+; CHECK-NEXT: [[NEEDLE_NEXT:%.*]] = getelementptr inbounds i8, ptr addrspace(2) [[NEEDLE_PTR:%.*]], i64 1
+; CHECK-NEXT: [[NEEDLE_CMP:%.*]] = icmp eq ptr addrspace(2) [[NEEDLE_NEXT]], [[NEEDLE_END]]
+; CHECK-NEXT: br i1 [[NEEDLE_CMP]], label %[[SEARCH_CHECK]], label %[[MATCH_CHECK]]
+; CHECK: [[MATCH_CHECK]]:
+; CHECK-NEXT: [[NEEDLE_PTR]] = phi ptr addrspace(2) [ [[NEEDLE_START]], %[[HEADER]] ], [ [[NEEDLE_NEXT]], %[[NEEDLE_CHECK]] ]
+; CHECK-NEXT: [[NEEDLE_LOAD:%.*]] = load i8, ptr addrspace(2) [[NEEDLE_PTR]], align 1
+; CHECK-NEXT: [[MATCH_CMP:%.*]] = icmp eq i8 [[SEARCH_LOAD]], [[NEEDLE_LOAD]]
+; CHECK-NEXT: br i1 [[MATCH_CMP]], label %[[EXIT_LOOPEXIT]], label %[[NEEDLE_CHECK]]
+; CHECK: [[SEARCH_CHECK]]:
+; CHECK-NEXT: [[SEARCH_NEXT]] = getelementptr inbounds i8, ptr addrspace(1) [[SEARCH_PTR]], i64 1
+; CHECK-NEXT: [[SEARCH_CMP:%.*]] = icmp eq ptr addrspace(1) [[SEARCH_NEXT]], [[SEARCH_END]]
+; CHECK-NEXT: br i1 [[SEARCH_CMP]], label %[[EXIT_LOOPEXIT1]], label %[[HEADER]]
+; CHECK: [[EXIT_LOOPEXIT]]:
+; CHECK-NEXT: [[SEARCH_PTR_LCSSA:%.*]] = phi ptr addrspace(1) [ [[SEARCH_PTR]], %[[MATCH_CHECK]] ], [ [[MATCH_RES]], %[[CALCULATE_MATCH]] ]
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT_LOOPEXIT1]]:
+; CHECK-NEXT: br label %[[EXIT]]
+; CHECK: [[EXIT]]:
+; CHECK-NEXT: [[RES:%.*]] = phi ptr addrspace(1) [ [[SEARCH_END]], %[[ENTRY]] ], [ [[SEARCH_PTR_LCSSA]], %[[EXIT_LOOPEXIT]] ], [ [[SEARCH_END]], %[[EXIT_LOOPEXIT1]] ]
+; CHECK-NEXT: ret ptr addrspace(1) [[RES]]
+;
+; DISABLE-LABEL: define ptr addrspace(1) @find_first_of_i8_addrspace(
+; DISABLE-SAME: ptr addrspace(1) [[SEARCH_START:%.*]], ptr addrspace(1) [[SEARCH_END:%.*]], ptr addrspace(2) [[NEEDLE_START:%.*]], ptr addrspace(2) [[NEEDLE_END:%.*]]) #[[ATTR0]] {
+; DISABLE-NEXT: [[ENTRY:.*]]:
+; DISABLE-NEXT: [[SEARCH_TEST:%.*]] = icmp eq ptr addrspace(1) [[SEARCH_START]], [[SEARCH_END]]
+; DISABLE-NEXT: [[NEEDLE_TEST:%.*]] = icmp eq ptr addrspace(2) [[NEEDLE_START]], [[NEEDLE_END]]
+; DISABLE-NEXT: [[COMBINED_TEST:%.*]] = or i1 [[SEARCH_TEST]], [[NEEDLE_TEST]]
+; DISABLE-NEXT: br i1 [[COMBINED_TEST]], label %[[EXIT:.*]], label %[[HEADER_PREHEADER:.*]]
+; DISABLE: [[HEADER_PREHEADER]]:
+; DISABLE-NEXT: br label %[[HEADER:.*]]
+; DISABLE: [[HEADER]]:
+; DISABLE-NEXT: [[SEARCH_PTR:%.*]] = phi ptr addrspace(1) [ [[SEARCH_NEXT:%.*]], %[[SEARCH_CHECK:.*]] ], [ [[SEARCH_START]], %[[HEADER_PREHEADER]] ]
+; DISABLE-NEXT: [[SEARCH_LOAD:%.*]] = load i8, ptr addrspace(1) [[SEARCH_PTR]], align 1
+; DISABLE-NEXT: br label %[[MATCH_CHECK:.*]]
+; DISABLE: [[NEEDLE_CHECK:.*]]:
+; DISABLE-NEXT: [[NEEDLE_NEXT:%.*]] = getelementptr inbounds i8, ptr addrspace(2) [[NEEDLE_PTR:%.*]], i64 1
+; DISABLE-NEXT: [[NEEDLE_CMP:%.*]] = icmp eq ptr addrspace(2) [[NEEDLE_NEXT]], [[NEEDLE_END]]
+; DISABLE-NEXT: br i1 [[NEEDLE_CMP]], label %[[SEARCH_CHECK]], label %[[MATCH_CHECK]]
+; DISABLE: [[MATCH_CHECK]]:
+; DISABLE-NEXT: [[NEEDLE_PTR]] = phi ptr addrspace(2) [ [[NEEDLE_START]], %[[HEADER]] ], [ [[NEEDLE_NEXT]], %[[NEEDLE_CHECK]] ]
+; DISABLE-NEXT: [[NEEDLE_LOAD:%.*]] = load i8, ptr addrspace(2) [[NEEDLE_PTR]], align 1
+; DISABLE-NEXT: [[MATCH_CMP:%.*]] = icmp eq i8 [[SEARCH_LOAD]], [[NEEDLE_LOAD]]
+; DISABLE-NEXT: br i1 [[MATCH_CMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[NEEDLE_CHECK]]
+; DISABLE: [[SEARCH_CHECK]]:
+; DISABLE-NEXT: [[SEARCH_NEXT]] = getelementptr inbounds i8, ptr addrspace(1) [[SEARCH_PTR]], i64 1
+; DISABLE-NEXT: [[SEARCH_CMP:%.*]] = icmp eq ptr addrspace(1) [[SEARCH_NEXT]], [[SEARCH_END]]
+; DISABLE-NEXT: br i1 [[SEARCH_CMP]], label %[[EXIT_LOOPEXIT1:.*]], label %[[HEADER]]
+; DISABLE: [[EXIT_LOOPEXIT]]:
+; DISABLE-NEXT: [[SEARCH_PTR_LCSSA:%.*]] = phi ptr addrspace(1) [ [[SEARCH_PTR]], %[[MATCH_CHECK]] ]
+; DISABLE-NEXT: br label %[[EXIT]]
+; DISABLE: [[EXIT_LOOPEXIT1]]:
+; DISABLE-NEXT: br label %[[EXIT]]
+; DISABLE: [[EXIT]]:
+; DISABLE-NEXT: [[RES:%.*]] = phi ptr addrspace(1) [ [[SEARCH_END]], %[[ENTRY]] ], [ [[SEARCH_PTR_LCSSA]], %[[EXIT_LOOPEXIT]] ], [ [[SEARCH_END]], %[[EXIT_LOOPEXIT1]] ]
+; DISABLE-NEXT: ret ptr addrspace(1) [[RES]]
+;
+entry:
+ %search_test = icmp eq ptr addrspace(1) %search_start, %search_end
+ %needle_test = icmp eq ptr addrspace(2) %needle_start, %needle_end
+ %combined_test = or i1 %search_test, %needle_test
+ br i1 %combined_test, label %exit, label %header
+
+header:
+ %search_ptr = phi ptr addrspace(1) [ %search_next, %search_check ], [ %search_start, %entry ]
+ %search_load = load i8, ptr addrspace(1) %search_ptr, align 1
+ br label %match_check
+
+needle_check:
+ %needle_next = getelementptr inbounds i8, ptr addrspace(2) %needle_ptr, i64 1
+ %needle_cmp = icmp eq ptr addrspace(2) %needle_next, %needle_end
+ br i1 %needle_cmp, label %search_check, label %match_check
+
+match_check:
+ %needle_ptr = phi ptr addrspace(2) [ %needle_start, %header ], [ %needle_next, %needle_check ]
+ %needle_load = load i8, ptr addrspace(2) %needle_ptr, align 1
+ %match_cmp = icmp eq i8 %search_load, %needle_load
+ br i1 %match_cmp, label %exit, label %needle_check
+
+search_check:
+ %search_next = getelementptr inbounds i8, ptr addrspace(1) %search_ptr, i64 1
+ %search_cmp = icmp eq ptr addrspace(1) %search_next, %search_end
+ br i1 %search_cmp, label %exit, label %header
+
+exit:
+ %res = phi ptr addrspace(1) [ %search_end, %entry ], [ %search_ptr, %match_check ], [ %search_end, %search_check ]
+ ret ptr addrspace(1) %res
+}
+
; From here on we only test for the presence/absence of the intrinsic.
; UTC_ARGS: --disable
@@ -974,45 +1133,6 @@ exit:
ret ptr %res
}
-; Same as @find_first_of_i8 but with non-zero address space pointers.
-; This verifies that the vectorized loop correctly preserves address spaces.
-define ptr addrspace(1) @find_first_of_i8_addrspace(ptr addrspace(1) %search_start, ptr addrspace(1) %search_end, ptr addrspace(1) %needle_start, ptr addrspace(1) %needle_end) #0 {
-; CHECK-LABEL: define ptr addrspace(1) @find_first_of_i8_addrspace(
-; CHECK: @llvm.experimental.vector.match
-; CHECK: ret ptr addrspace(1)
-;
-entry:
- %search_test = icmp eq ptr addrspace(1) %search_start, %search_end
- %needle_test = icmp eq ptr addrspace(1) %needle_start, %needle_end
- %combined_test = or i1 %search_test, %needle_test
- br i1 %combined_test, label %exit, label %header
-
-header:
- %search_ptr = phi ptr addrspace(1) [ %search_next, %search_check ], [ %search_start, %entry ]
- %search_load = load i8, ptr addrspace(1) %search_ptr, align 1
- br label %match_check
-
-needle_check:
- %needle_next = getelementptr inbounds i8, ptr addrspace(1) %needle_ptr, i64 1
- %needle_cmp = icmp eq ptr addrspace(1) %needle_next, %needle_end
- br i1 %needle_cmp, label %search_check, label %match_check
-
-match_check:
- %needle_ptr = phi ptr addrspace(1) [ %needle_start, %header ], [ %needle_next, %needle_check ]
- %needle_load = load i8, ptr addrspace(1) %needle_ptr, align 1
- %match_cmp = icmp eq i8 %search_load, %needle_load
- br i1 %match_cmp, label %exit, label %needle_check
-
-search_check:
- %search_next = getelementptr inbounds i8, ptr addrspace(1) %search_ptr, i64 1
- %search_cmp = icmp eq ptr addrspace(1) %search_next, %search_end
- br i1 %search_cmp, label %exit, label %header
-
-exit:
- %res = phi ptr addrspace(1) [ %search_end, %entry ], [ %search_ptr, %match_check ], [ %search_end, %search_check ]
- ret ptr addrspace(1) %res
-}
-
attributes #0 = { "target-features"="+sve2" }
;.
; CHECK: [[PROF0]] = !{!"branch_weights", i32 10, i32 90}
More information about the llvm-commits
mailing list