[llvm] [VPlan] Sink single-scalar replicates in licm (PR #187047)
Ramkumar Ramachandra via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 20 04:17:15 PDT 2026
https://github.com/artagnon updated https://github.com/llvm/llvm-project/pull/187047
>From cb4fb01f461b6656a4fed0cbf4dccd9e4813c370 Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Tue, 17 Mar 2026 15:48:21 +0000
Subject: [PATCH 1/2] [VPlan] Sink single-scalar replicates in licm
Refine the replicate bail-out in licm to permit single-scalar
unpredicated replicates.
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 11 ++-
...first-order-recurrence-with-uniform-ops.ll | 18 ++---
.../LoopVectorize/first-order-recurrence.ll | 24 +++---
.../LoopVectorize/iv_outside_user.ll | 76 +++++++------------
.../Transforms/LoopVectorize/lcssa-crashes.ll | 2 +-
llvm/test/Transforms/LoopVectorize/pr66616.ll | 2 +-
6 files changed, 56 insertions(+), 77 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 63bd885f20917..91879d16ef0da 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2730,10 +2730,13 @@ static void licm(VPlan &Plan) {
if (cannotHoistOrSinkRecipe(R))
continue;
- // TODO: Support sinking VPReplicateRecipe after ensuring replicateByVF
- // handles sunk recipes correctly.
- if (isa<VPReplicateRecipe>(&R))
- continue;
+ // narrowToSingleScalarRecipes should have already maximally narrowed
+ // replicates to single-scalar replicates. TODO: When unrolling
+ // replicatebyVF doesn't handle non-single-scalar replicates that are
+ // sunk yet.
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R))
+ if (RepR->isPredicated() || !RepR->isSingleScalar())
+ continue;
// TODO: Use R.definedValues() instead of casting to VPSingleDefRecipe to
// support recipes with multiple defined values (e.g., interleaved loads).
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll
index 372876c5faac6..408573273b962 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll
@@ -134,18 +134,18 @@ define i16 @for_phi_removed(ptr %src) {
; UNROLL-NO-IC: [[VECTOR_BODY]]:
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
-; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104
; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; UNROLL-NO-IC: [[MIDDLE_BLOCK]]:
+; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], 0
+; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0
; UNROLL-NO-IC-NEXT: br label %[[SCALAR_PH:.*]]
; UNROLL-NO-IC: [[SCALAR_PH]]:
; UNROLL-NO-IC-NEXT: br label %[[LOOP:.*]]
; UNROLL-NO-IC: [[LOOP]]:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ 104, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; UNROLL-NO-IC-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0
@@ -165,18 +165,18 @@ define i16 @for_phi_removed(ptr %src) {
; UNROLL-NO-VF: [[VECTOR_BODY]]:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
-; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 110
; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; UNROLL-NO-VF: [[MIDDLE_BLOCK]]:
+; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], 0
+; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0
; UNROLL-NO-VF-NEXT: br label %[[SCALAR_PH:.*]]
; UNROLL-NO-VF: [[SCALAR_PH]]:
; UNROLL-NO-VF-NEXT: br label %[[LOOP:.*]]
; UNROLL-NO-VF: [[LOOP]]:
; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ 110, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; UNROLL-NO-VF-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; UNROLL-NO-VF-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; UNROLL-NO-VF-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
; UNROLL-NO-VF-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; UNROLL-NO-VF-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0
@@ -196,18 +196,18 @@ define i16 @for_phi_removed(ptr %src) {
; SINK-AFTER: [[VECTOR_BODY]]:
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
-; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
-; SINK-AFTER-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108
; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; SINK-AFTER: [[MIDDLE_BLOCK]]:
+; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], 0
+; SINK-AFTER-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0
; SINK-AFTER-NEXT: br label %[[SCALAR_PH:.*]]
; SINK-AFTER: [[SCALAR_PH]]:
; SINK-AFTER-NEXT: br label %[[LOOP:.*]]
; SINK-AFTER: [[LOOP]]:
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ 108, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; SINK-AFTER-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; SINK-AFTER-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
index 6b4727a1379f8..194b006956f03 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence.ll
@@ -1297,13 +1297,13 @@ define i32 @extract_second_last_iteration(ptr %cval, i32 %x, i32 %n) {
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
-; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add i32 [[VECTOR_RECUR]], 1
-; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[VECTOR_RECUR]], [[X:%.*]]
-; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i32 [[TMP6]], [[X]]
; UNROLL-NO-VF-NEXT: [[TMP3]] = add nuw i32 [[VECTOR_RECUR]], 2
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
; UNROLL-NO-VF: middle.block:
+; UNROLL-NO-VF-NEXT: [[TMP6:%.*]] = add i32 [[VECTOR_RECUR]], 1
+; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i32 [[VECTOR_RECUR]], [[X:%.*]]
+; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i32 [[TMP6]], [[X]]
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
@@ -2482,7 +2482,6 @@ define void @sink_dead_inst(ptr %a) {
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i16 [[TMP1]], 1
-; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = add i16 [[TMP2]], 5
; UNROLL-NO-VF-NEXT: [[TMP6]] = add i16 [[TMP3]], 5
; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = sub i16 [[VECTOR_RECUR]], 10
@@ -2495,6 +2494,7 @@ define void @sink_dead_inst(ptr %a) {
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42
; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
; UNROLL-NO-VF: middle.block:
+; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = zext i16 [[TMP3]] to i32
; UNROLL-NO-VF-NEXT: br label [[SCALAR_PH:%.*]]
; UNROLL-NO-VF: scalar.ph:
; UNROLL-NO-VF-NEXT: br label [[FOR_COND:%.*]]
@@ -3240,18 +3240,18 @@ define i32 @sink_after_dead_inst(ptr %A.ptr, i32 %n) {
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = trunc i32 [[VECTOR_RECUR]] to i16
-; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 1
-; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], [[TMP1]]
-; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = zext i16 [[TMP2]] to i32
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A_PTR:%.*]], i16 [[OFFSET_IDX]]
-; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP3]]
+; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = getelementptr i32, ptr [[A_PTR]], i16 [[TMP1]]
; UNROLL-NO-VF-NEXT: store i32 0, ptr [[TMP8]], align 4
; UNROLL-NO-VF-NEXT: store i32 0, ptr [[TMP9]], align 4
; UNROLL-NO-VF-NEXT: [[TMP7]] = add nuw i32 [[VECTOR_RECUR]], 2
; UNROLL-NO-VF-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP7]], 16
; UNROLL-NO-VF-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
; UNROLL-NO-VF: middle.block:
+; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 1
+; UNROLL-NO-VF-NEXT: [[TMP5:%.*]] = or i16 [[TMP4]], [[TMP4]]
+; UNROLL-NO-VF-NEXT: [[TMP10:%.*]] = zext i16 [[TMP5]] to i32
; UNROLL-NO-VF-NEXT: br label [[LOOP:%.*]]
; UNROLL-NO-VF: for.end:
; UNROLL-NO-VF-NEXT: ret i32 [[TMP10]]
@@ -3363,15 +3363,15 @@ define void @unused_recurrence(ptr %a, i16 %n) {
; UNROLL-NO-VF-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL-NO-VF: vector.body:
; UNROLL-NO-VF-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; UNROLL-NO-VF-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[DOTCAST2:%.*]] = trunc i32 [[INDEX]] to i16
; UNROLL-NO-VF-NEXT: [[OFFSET_IDX:%.*]] = add i16 -27, [[DOTCAST2]]
; UNROLL-NO-VF-NEXT: [[TMP9:%.*]] = add i16 [[OFFSET_IDX]], 1
; UNROLL-NO-VF-NEXT: [[TMP8:%.*]] = add i16 [[TMP9]], 1
; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = add i16 [[TMP8]], 5
-; UNROLL-NO-VF-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; UNROLL-NO-VF-NEXT: [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
-; UNROLL-NO-VF-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
-; UNROLL-NO-VF: middle.block:
; UNROLL-NO-VF-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP6]], [[N_VEC]]
; UNROLL-NO-VF-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
; UNROLL-NO-VF: scalar.ph:
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 03ffb6fbc5847..553350bc9a066 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -676,12 +676,12 @@ define i32 @postinc_not_iv_backedge_value(i32 %k) {
; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
; INTERLEAVE: [[VECTOR_BODY]]:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 1
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 2
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[MIDDLE_BLOCK]]:
+; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
+; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 2
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; INTERLEAVE: [[SCALAR_PH]]:
@@ -694,7 +694,7 @@ define i32 @postinc_not_iv_backedge_value(i32 %k) {
; INTERLEAVE-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], [[K]]
; INTERLEAVE-NEXT: br i1 [[CMP]], label %[[FOR_END]], label %[[FOR_BODY]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[FOR_END]]:
-; INTERLEAVE-NEXT: [[INC_2_LCSSA:%.*]] = phi i32 [ [[INC_2]], %[[FOR_BODY]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ]
+; INTERLEAVE-NEXT: [[INC_2_LCSSA:%.*]] = phi i32 [ [[INC_2]], %[[FOR_BODY]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ]
; INTERLEAVE-NEXT: ret i32 [[INC_2_LCSSA]]
;
entry:
@@ -1120,53 +1120,29 @@ e.exit:
}
define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) {
-; VEC-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
-; VEC-SAME: ptr [[DST:%.*]]) {
-; VEC-NEXT: [[ENTRY:.*:]]
-; VEC-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32
-; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1
-; VEC-NEXT: br label %[[VECTOR_PH:.*]]
-; VEC: [[VECTOR_PH]]:
-; VEC-NEXT: br label %[[VECTOR_BODY:.*]]
-; VEC: [[VECTOR_BODY]]:
-; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
-; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
-; VEC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[OFFSET_IDX]]
-; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
-; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2
-; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2
-; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
-; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
-; VEC: [[MIDDLE_BLOCK]]:
-; VEC-NEXT: br label %[[E_EXIT:.*]]
-; VEC: [[E_EXIT]]:
-; VEC-NEXT: ret i32 8
-;
-; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
-; INTERLEAVE-SAME: ptr [[DST:%.*]]) {
-; INTERLEAVE-NEXT: [[ENTRY:.*:]]
-; INTERLEAVE-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32
-; INTERLEAVE-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1
-; INTERLEAVE-NEXT: br label %[[VECTOR_PH:.*]]
-; INTERLEAVE: [[VECTOR_PH]]:
-; INTERLEAVE-NEXT: br label %[[VECTOR_BODY:.*]]
-; INTERLEAVE: [[VECTOR_BODY]]:
-; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; INTERLEAVE-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
-; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[OFFSET_IDX]]
-; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]]
-; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2
-; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2
-; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
-; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
-; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
-; INTERLEAVE: [[MIDDLE_BLOCK]]:
-; INTERLEAVE-NEXT: br label %[[E_EXIT:.*]]
-; INTERLEAVE: [[E_EXIT]]:
-; INTERLEAVE-NEXT: ret i32 8
+; CHECK-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(
+; CHECK-SAME: ptr [[DST:%.*]]) {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[STEP_1:%.*]] = sext i8 0 to i32
+; CHECK-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1
+; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 2
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]]
+; CHECK-NEXT: store i16 0, ptr [[TMP1]], align 2
+; CHECK-NEXT: store i16 0, ptr [[TMP2]], align 2
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4
+; CHECK-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br label %[[E_EXIT:.*]]
+; CHECK: [[E_EXIT]]:
+; CHECK-NEXT: ret i32 8
;
entry:
%step.1 = sext i8 0 to i32
diff --git a/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll b/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
index fd9d1f67b8b07..5475beb0b10f2 100644
--- a/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
+++ b/llvm/test/Transforms/LoopVectorize/lcssa-crashes.ll
@@ -200,7 +200,6 @@ define i32 @exit_phi_sunk_def(ptr noalias %src, ptr noalias %dst) {
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC:%.*]], align 4
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: br i1 true, label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
; CHECK: pred.store.if:
; CHECK-NEXT: store i32 1, ptr [[DST:%.*]], align 4
@@ -223,6 +222,7 @@ define i32 @exit_phi_sunk_def(ptr noalias %src, ptr noalias %dst) {
; CHECK: pred.store.continue6:
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
; CHECK: middle.block:
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[TMP1]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 2)
; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[SEL]], i32 0
; CHECK-NEXT: br label [[EXIT:%.*]]
diff --git a/llvm/test/Transforms/LoopVectorize/pr66616.ll b/llvm/test/Transforms/LoopVectorize/pr66616.ll
index 1e093407620d5..be9e53c59aaf4 100644
--- a/llvm/test/Transforms/LoopVectorize/pr66616.ll
+++ b/llvm/test/Transforms/LoopVectorize/pr66616.ll
@@ -13,11 +13,11 @@ define void @pr66616(ptr %ptr) {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[PTR]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 1
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
; CHECK-NEXT: br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
+; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP0]], 1
; CHECK-NEXT: br label [[LOOP_1:%.*]]
; CHECK: preheader:
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -1, [[TMP0]]
>From 5116ff1112065607bbc68daf48293ee8279bfdfe Mon Sep 17 00:00:00 2001
From: Ramkumar Ramachandra <artagnon at tenstorrent.com>
Date: Fri, 20 Mar 2026 11:15:25 +0000
Subject: [PATCH 2/2] [VPlan] Strengthen with assert, fix UTC updates by hand
---
.../Transforms/Vectorize/VPlanTransforms.cpp | 15 +++++++++------
.../first-order-recurrence-with-uniform-ops.ll | 18 +++++++++---------
.../LoopVectorize/iv_outside_user.ll | 6 +++---
3 files changed, 21 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 91879d16ef0da..22c0fcefe9c16 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -2730,13 +2730,16 @@ static void licm(VPlan &Plan) {
if (cannotHoistOrSinkRecipe(R))
continue;
- // narrowToSingleScalarRecipes should have already maximally narrowed
- // replicates to single-scalar replicates. TODO: When unrolling
- // replicatebyVF doesn't handle non-single-scalar replicates that are
- // sunk yet.
- if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R))
- if (RepR->isPredicated() || !RepR->isSingleScalar())
+ if (auto *RepR = dyn_cast<VPReplicateRecipe>(&R)) {
+ assert(!RepR->isPredicated() &&
+ "Expected prior transformation of predicated replicates to "
+ "replicate regions");
+ // narrowToSingleScalarRecipes should have already maximally narrowed
+ // replicates to single-scalar replicates. TODO: When unrolling,
+ // replicatebyVF crashes on non-single-scalar replicates that are sunk.
+ if (!RepR->isSingleScalar())
continue;
+ }
// TODO: Use R.definedValues() instead of casting to VPSingleDefRecipe to
// support recipes with multiple defined values (e.g., interleaved loads).
diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll
index 408573273b962..9e1ed870d6ecc 100644
--- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll
+++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll
@@ -138,14 +138,14 @@ define i16 @for_phi_removed(ptr %src) {
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104
; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; UNROLL-NO-IC: [[MIDDLE_BLOCK]]:
-; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], 0
-; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0
+; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
; UNROLL-NO-IC-NEXT: br label %[[SCALAR_PH:.*]]
; UNROLL-NO-IC: [[SCALAR_PH]]:
; UNROLL-NO-IC-NEXT: br label %[[LOOP:.*]]
; UNROLL-NO-IC: [[LOOP]]:
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ 104, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; UNROLL-NO-IC-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; UNROLL-NO-IC-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0
@@ -169,14 +169,14 @@ define i16 @for_phi_removed(ptr %src) {
; UNROLL-NO-VF-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 110
; UNROLL-NO-VF-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; UNROLL-NO-VF: [[MIDDLE_BLOCK]]:
-; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], 0
-; UNROLL-NO-VF-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0
+; UNROLL-NO-VF-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; UNROLL-NO-VF-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
; UNROLL-NO-VF-NEXT: br label %[[SCALAR_PH:.*]]
; UNROLL-NO-VF: [[SCALAR_PH]]:
; UNROLL-NO-VF-NEXT: br label %[[LOOP:.*]]
; UNROLL-NO-VF: [[LOOP]]:
; UNROLL-NO-VF-NEXT: [[IV:%.*]] = phi i16 [ 110, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; UNROLL-NO-VF-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; UNROLL-NO-VF-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; UNROLL-NO-VF-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
; UNROLL-NO-VF-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; UNROLL-NO-VF-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0
@@ -200,14 +200,14 @@ define i16 @for_phi_removed(ptr %src) {
; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108
; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; SINK-AFTER: [[MIDDLE_BLOCK]]:
-; SINK-AFTER-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP0]], 0
-; SINK-AFTER-NEXT: [[TMP4:%.*]] = select i1 [[TMP2]], i16 1, i16 0
+; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
+; SINK-AFTER-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
; SINK-AFTER-NEXT: br label %[[SCALAR_PH:.*]]
; SINK-AFTER: [[SCALAR_PH]]:
; SINK-AFTER-NEXT: br label %[[LOOP:.*]]
; SINK-AFTER: [[LOOP]]:
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ 108, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
-; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[TMP4]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
+; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
; SINK-AFTER-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
; SINK-AFTER-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0
diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
index 553350bc9a066..ecb51cb2acbe4 100644
--- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
+++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -680,8 +680,8 @@ define i32 @postinc_not_iv_backedge_value(i32 %k) {
; INTERLEAVE-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[MIDDLE_BLOCK]]:
-; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i32 [[INDEX]], 1
-; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i32 [[TMP1]], 2
+; INTERLEAVE-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 1
+; INTERLEAVE-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 2
; INTERLEAVE-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; INTERLEAVE: [[SCALAR_PH]]:
@@ -694,7 +694,7 @@ define i32 @postinc_not_iv_backedge_value(i32 %k) {
; INTERLEAVE-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], [[K]]
; INTERLEAVE-NEXT: br i1 [[CMP]], label %[[FOR_END]], label %[[FOR_BODY]], {{!llvm.loop ![0-9]+}}
; INTERLEAVE: [[FOR_END]]:
-; INTERLEAVE-NEXT: [[INC_2_LCSSA:%.*]] = phi i32 [ [[INC_2]], %[[FOR_BODY]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ]
+; INTERLEAVE-NEXT: [[INC_2_LCSSA:%.*]] = phi i32 [ [[INC_2]], %[[FOR_BODY]] ], [ [[TMP1]], %[[MIDDLE_BLOCK]] ]
; INTERLEAVE-NEXT: ret i32 [[INC_2_LCSSA]]
;
entry:
More information about the llvm-commits
mailing list