[llvm] [LV] Vectorize early exit loops with multiple exits. (PR #174864)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 16 07:33:43 PST 2026
================
@@ -276,31 +382,50 @@ exit:
define i64 @two_early_exits_with_live_out_values() {
; CHECK-LABEL: define i64 @two_early_exits_with_live_out_values() {
-; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[ENTRY:.*:]]
; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1
; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024)
; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024)
-; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
-; CHECK: [[LOOP_HEADER]]:
-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = phi i64 [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ], [ 3, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[COND_0:.*]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX]]
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_A:%.*]] = load i8, ptr [[TMP0]], align 1
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP0]], align 1
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[LD_B:%.*]] = load i8, ptr [[TMP1]], align 1
-; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: br i1 [[CMP1]], label %[[EXIT:.*]], label %[[EARLY_EXIT_0:.*]]
-; CHECK: [[EARLY_EXIT_0]]:
-; CHECK-NEXT: [[SUM:%.*]] = add i8 [[LD_A]], [[LD_B]]
-; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[SUM]], 34
-; CHECK-NEXT: br i1 [[CMP2]], label %[[EXIT]], label %[[LOOP_LATCH]]
-; CHECK: [[LOOP_LATCH]]:
-; CHECK-NEXT: [[IV_NEXT]] = add i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[EC:%.*]] = icmp ne i64 [[IV_NEXT]], 67
-; CHECK-NEXT: br i1 [[EC]], label %[[LOOP_HEADER]], label %[[EXIT]]
+; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP1]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i8> [[WIDE_LOAD]], [[WIDE_LOAD1]]
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult <4 x i8> [[TMP3]], splat (i8 34)
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]]
+; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 64
+; CHECK-NEXT: br i1 [[TMP7]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[COND_0]]
+; CHECK: [[COND_0]]:
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[EXIT:.*]]
+; CHECK: [[VECTOR_EARLY_EXIT]]:
+; CHECK-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v4i1(<4 x i1> [[TMP5]], i1 false)
----------------
fhahn wrote:
Yes, this may be something we want to tune in the future. We should be able to convert to the other form if profitable. The reason I went for the current one was that it seemed slightly simpler.
For NEON, the extracts currently go through memory, but I think that's similar to what we generate for other extracts for single early exits.
https://github.com/llvm/llvm-project/pull/174864
More information about the llvm-commits
mailing list