[llvm] 40b7034 - [LV] Add tests for vector backedge elimination with early-exit loops.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Sun Mar 16 12:44:41 PDT 2025
Author: Florian Hahn
Date: 2025-03-16T19:42:30Z
New Revision: 40b703421377fbc0f72a7f028d1d56a057d8448e
URL: https://github.com/llvm/llvm-project/commit/40b703421377fbc0f72a7f028d1d56a057d8448e
DIFF: https://github.com/llvm/llvm-project/commit/40b703421377fbc0f72a7f028d1d56a057d8448e.diff
LOG: [LV] Add tests for vector backedge elimination with early-exit loops.
Added:
llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
Modified:
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
new file mode 100644
index 0000000000000..51458a7bb80b6
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll
@@ -0,0 +1,317 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
+; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=1 -enable-early-exit-vectorization -S %s | FileCheck --check-prefixes=VF8UF1 %s
+; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=8 -force-vector-interleave=2 -enable-early-exit-vectorization -S %s | FileCheck --check-prefixes=VF8UF2 %s
+; RUN: opt -passes='loop-vectorize,verify<loops>' -force-vector-width=16 -force-vector-interleave=1 -enable-early-exit-vectorization -S %s | FileCheck --check-prefixes=VF16UF1 %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+
+; Check if the vector loop condition can be simplified to true for a given
+; VF/IC combination.
+define i8 @test_early_exit_max_tc_less_than_16(ptr %A, i64 %N) nosync nofree {
+; VF8UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
+; VF8UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; VF8UF1-NEXT: [[ENTRY:.*]]:
+; VF8UF1-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
+; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF8UF1: [[VECTOR_PH]]:
+; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF8UF1: [[VECTOR_BODY]]:
+; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF8UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
+; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
+; VF8UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
+; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
+; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
+; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
+; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; VF8UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VF8UF1: [[MIDDLE_SPLIT]]:
+; VF8UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; VF8UF1: [[MIDDLE_BLOCK]]:
+; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VF8UF1: [[VECTOR_EARLY_EXIT]]:
+; VF8UF1-NEXT: br label %[[EXIT]]
+; VF8UF1: [[SCALAR_PH]]:
+; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]]
+; VF8UF1: [[LOOP_HEADER]]:
+; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF8UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
+; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
+; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
+; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; VF8UF1: [[LOOP_LATCH]]:
+; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
+; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
+; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF8UF1: [[EXIT]]:
+; VF8UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
+; VF8UF1-NEXT: ret i8 [[RES]]
+;
+; VF8UF2-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
+; VF8UF2-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; VF8UF2-NEXT: [[ENTRY:.*]]:
+; VF8UF2-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
+; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF8UF2: [[VECTOR_PH]]:
+; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF8UF2: [[VECTOR_BODY]]:
+; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF8UF2-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
+; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
+; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
+; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
+; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
+; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
+; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VF8UF2: [[MIDDLE_SPLIT]]:
+; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; VF8UF2: [[MIDDLE_BLOCK]]:
+; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VF8UF2: [[VECTOR_EARLY_EXIT]]:
+; VF8UF2-NEXT: br label %[[EXIT]]
+; VF8UF2: [[SCALAR_PH]]:
+; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]]
+; VF8UF2: [[LOOP_HEADER]]:
+; VF8UF2-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF8UF2-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
+; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
+; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
+; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; VF8UF2: [[LOOP_LATCH]]:
+; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
+; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
+; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF8UF2: [[EXIT]]:
+; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
+; VF8UF2-NEXT: ret i8 [[RES]]
+;
+; VF16UF1-LABEL: define i8 @test_early_exit_max_tc_less_than_16(
+; VF16UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
+; VF16UF1-NEXT: [[ENTRY:.*]]:
+; VF16UF1-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
+; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF16UF1: [[VECTOR_PH]]:
+; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF16UF1: [[VECTOR_BODY]]:
+; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF16UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
+; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
+; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
+; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
+; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
+; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
+; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; VF16UF1: [[MIDDLE_SPLIT]]:
+; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; VF16UF1: [[MIDDLE_BLOCK]]:
+; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VF16UF1: [[VECTOR_EARLY_EXIT]]:
+; VF16UF1-NEXT: br label %[[EXIT]]
+; VF16UF1: [[SCALAR_PH]]:
+; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]]
+; VF16UF1: [[LOOP_HEADER]]:
+; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF16UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
+; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
+; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
+; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; VF16UF1: [[LOOP_LATCH]]:
+; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
+; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
+; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
+; VF16UF1: [[EXIT]]:
+; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ]
+; VF16UF1-NEXT: ret i8 [[RES]]
+;
+entry:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 32) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %p.src = getelementptr inbounds i8, ptr %A, i64 %iv
+ %l = load i8, ptr %p.src, align 1
+ %c = icmp eq i8 %l, 0
+ br i1 %c, label %exit, label %loop.latch
+
+loop.latch:
+ %iv.next = add nsw i64 %iv, 1
+ %cmp = icmp eq i64 %iv.next, 16
+ br i1 %cmp, label %exit, label %loop.header
+
+exit:
+ %res = phi i8 [ 0, %loop.header ], [ 1, %loop.latch ]
+ ret i8 %res
+}
+
+define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr %A, i64 %N) nosync nofree {
+; VF8UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
+; VF8UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; VF8UF1-NEXT: [[ENTRY:.*]]:
+; VF8UF1-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
+; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF8UF1: [[VECTOR_PH]]:
+; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF8UF1: [[VECTOR_BODY]]:
+; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF8UF1-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF8UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
+; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
+; VF8UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
+; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
+; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
+; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; VF8UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
+; VF8UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; VF8UF1-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; VF8UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; VF8UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF8UF1: [[MIDDLE_SPLIT]]:
+; VF8UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; VF8UF1: [[MIDDLE_BLOCK]]:
+; VF8UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VF8UF1: [[VECTOR_EARLY_EXIT]]:
+; VF8UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
+; VF8UF1-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <8 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; VF8UF1-NEXT: br label %[[EXIT]]
+; VF8UF1: [[SCALAR_PH]]:
+; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]]
+; VF8UF1: [[LOOP_HEADER]]:
+; VF8UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF8UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
+; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
+; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
+; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; VF8UF1: [[LOOP_LATCH]]:
+; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
+; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
+; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF8UF1: [[EXIT]]:
+; VF8UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
+; VF8UF1-NEXT: ret i64 [[RES]]
+;
+; VF8UF2-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
+; VF8UF2-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; VF8UF2-NEXT: [[ENTRY:.*]]:
+; VF8UF2-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
+; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF8UF2: [[VECTOR_PH]]:
+; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF8UF2: [[VECTOR_BODY]]:
+; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF8UF2-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF8UF2-NEXT: [[STEP_ADD:%.*]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
+; VF8UF2-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
+; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
+; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
+; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
+; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
+; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
+; VF8UF2-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; VF8UF2-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[STEP_ADD]], splat (i64 8)
+; VF8UF2-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; VF8UF2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF8UF2: [[MIDDLE_SPLIT]]:
+; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; VF8UF2: [[MIDDLE_BLOCK]]:
+; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VF8UF2: [[VECTOR_EARLY_EXIT]]:
+; VF8UF2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
+; VF8UF2-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <8 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; VF8UF2-NEXT: br label %[[EXIT]]
+; VF8UF2: [[SCALAR_PH]]:
+; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]]
+; VF8UF2: [[LOOP_HEADER]]:
+; VF8UF2-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF8UF2-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
+; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
+; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
+; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; VF8UF2: [[LOOP_LATCH]]:
+; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
+; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
+; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF8UF2: [[EXIT]]:
+; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
+; VF8UF2-NEXT: ret i64 [[RES]]
+;
+; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
+; VF16UF1-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
+; VF16UF1-NEXT: [[ENTRY:.*]]:
+; VF16UF1-NEXT: call void @llvm.assume(i1 true) [ "dereferenceable"(ptr [[A]], i64 32) ]
+; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; VF16UF1: [[VECTOR_PH]]:
+; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
+; VF16UF1: [[VECTOR_BODY]]:
+; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF16UF1-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; VF16UF1-NEXT: [[IV:%.*]] = add i64 [[INDEX]], 0
+; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]]
+; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
+; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
+; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
+; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
+; VF16UF1-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
+; VF16UF1-NEXT: [[VEC_IND_NEXT]] = add <16 x i64> [[VEC_IND]], splat (i64 16)
+; VF16UF1-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
+; VF16UF1-NEXT: br i1 [[TMP6]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; VF16UF1: [[MIDDLE_SPLIT]]:
+; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
+; VF16UF1: [[MIDDLE_BLOCK]]:
+; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
+; VF16UF1: [[VECTOR_EARLY_EXIT]]:
+; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true)
+; VF16UF1-NEXT: [[EARLY_EXIT_VALUE:%.*]] = extractelement <16 x i64> [[VEC_IND]], i64 [[FIRST_ACTIVE_LANE]]
+; VF16UF1-NEXT: br label %[[EXIT]]
+; VF16UF1: [[SCALAR_PH]]:
+; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]]
+; VF16UF1: [[LOOP_HEADER]]:
+; VF16UF1-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ]
+; VF16UF1-NEXT: [[P_SRC1:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV1]]
+; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC1]], align 1
+; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0
+; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]]
+; VF16UF1: [[LOOP_LATCH]]:
+; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
+; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
+; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]]
+; VF16UF1: [[EXIT]]:
+; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[EARLY_EXIT_VALUE]], %[[VECTOR_EARLY_EXIT]] ]
+; VF16UF1-NEXT: ret i64 [[RES]]
+;
+entry:
+ call void @llvm.assume(i1 true) [ "dereferenceable"(ptr %A, i64 32) ]
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %p.src = getelementptr inbounds i8, ptr %A, i64 %iv
+ %l = load i8, ptr %p.src, align 1
+ %c = icmp eq i8 %l, 0
+ br i1 %c, label %exit, label %loop.latch
+
+loop.latch:
+ %iv.next = add nsw i64 %iv, 1
+ %cmp = icmp eq i64 %iv.next, 16
+ br i1 %cmp, label %exit, label %loop.header
+
+exit:
+ %res = phi i64 [ %iv, %loop.header ], [ 1, %loop.latch ]
+ ret i64 %res
+}
More information about the llvm-commits
mailing list