[llvm] [VPlan] Create edge mask for single-destination switch (PR #179107)

via llvm-commits llvm-commits at lists.llvm.org
Sun Feb 1 05:31:17 PST 2026


https://github.com/Noratrieb created https://github.com/llvm/llvm-project/pull/179107

When converting phis to blends, the `VPPredicator` expects to have edge masks to the phi node if the phi node has different incoming blocks. This was not the case if the predecessor of the phi was a switch where a conditional destination was the same as the default destination.

This was because when creating edge masks in `createSwitchEdgeMasks`, edge masks are set in a loop through the *non-default* destinations. But when there are no non-default destinations (but at least one condition, otherwise an earlier condition would trigger and just forward the source mask), this loop is never executed, so the masks are never set.

To resolve this, we explicitly forward the source mask for these cases as well, which is correct because it is an unconditional branch, just a very convoluted one.

fixes #179074

>From e868ee8d695a2933244bf6084196fd889dba5ade Mon Sep 17 00:00:00 2001
From: Noratrieb <48135649+Noratrieb at users.noreply.github.com>
Date: Sun, 1 Feb 2026 13:58:49 +0100
Subject: [PATCH] [VPlan] Create edge mask for single-destination switch

When converting phis to blends, the `VPPredicator` expects to have edge
masks to the phi node if the phi node has different incoming blocks.
This was not the case if the predecessor of the phi was a switch where
a conditional destination was the same as the default destination.

This was because when creating edge masks in `createSwitchEdgeMasks`,
edge masks are set in a loop through the *non-default* destinations.
But when there are no non-default destinations (but at least one
condition, otherwise an earlier condition would trigger and just forward
the source mask), this loop is never executed, so the masks are never
set.

To resolve this, we explicitly forward the source mask for these cases
as well, which is correct because it is an unconditional branch, just a
very convoluted one.
---
 .../Transforms/Vectorize/VPlanPredicator.cpp  |  4 +
 .../LoopVectorize/predicate-switch.ll         | 77 ++++++++++++++++++-
 2 files changed, 78 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
index f7e7fc29bc203..112e87492cf39 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanPredicator.cpp
@@ -225,6 +225,10 @@ void VPPredicator::createSwitchEdgeMasks(VPInstruction *SI) {
     DefaultMask = Builder.createNot(DefaultMask);
     if (SrcMask)
       DefaultMask = Builder.createLogicalAnd(SrcMask, DefaultMask);
+  } else {
+    // There are no other destinations than the default destination, so this is
+    // an unconditional branch.
+    DefaultMask = SrcMask;
   }
   setEdgeMask(Src, DefaultDst, DefaultMask);
 }
diff --git a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
index 3276528e54225..7943e42b622d3 100644
--- a/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
+++ b/llvm/test/Transforms/LoopVectorize/predicate-switch.ll
@@ -508,6 +508,74 @@ define void @switch_unconditional(ptr %start) {
 ; IC2:       [[EXIT]]:
 ; IC2-NEXT:    ret void
 ;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %gep = getelementptr i32, ptr %start, i64 %iv
+  %x = load i32, ptr %gep
+  switch i32 %x, label %foo []
+
+foo:
+  br label %loop.latch
+
+loop.latch:
+  store i32 0, ptr %gep
+  %iv.next = add i64 %iv, 1
+  %cmp = icmp eq i64 %iv.next, 100
+  br i1 %cmp, label %exit, label %loop.header
+
+exit:
+  ret void
+}
+
+define void @switch_unconditional_duplicate_target(ptr %start) {
+; IC1-LABEL: define void @switch_unconditional_duplicate_target(
+; IC1-SAME: ptr [[START:%.*]]) {
+; IC1-NEXT:  [[ENTRY:.*:]]
+; IC1-NEXT:    br label %[[VECTOR_PH:.*]]
+; IC1:       [[VECTOR_PH]]:
+; IC1-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC1:       [[VECTOR_BODY]]:
+; IC1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC1-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> [[VEC_IND]]
+; IC1-NEXT:    [[TMP1:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
+; IC1-NEXT:    store <2 x i32> zeroinitializer, ptr [[TMP1]], align 4
+; IC1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
+; IC1-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; IC1-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; IC1-NEXT:    br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IC1:       [[MIDDLE_BLOCK]]:
+; IC1-NEXT:    br label %[[EXIT:.*]]
+; IC1:       [[EXIT]]:
+; IC1-NEXT:    ret void
+;
+; IC2-LABEL: define void @switch_unconditional_duplicate_target(
+; IC2-SAME: ptr [[START:%.*]]) {
+; IC2-NEXT:  [[ENTRY:.*:]]
+; IC2-NEXT:    br label %[[VECTOR_PH:.*]]
+; IC2:       [[VECTOR_PH]]:
+; IC2-NEXT:    br label %[[VECTOR_BODY:.*]]
+; IC2:       [[VECTOR_BODY]]:
+; IC2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC2-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; IC2-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
+; IC2-NEXT:    [[TMP0:%.*]] = getelementptr i32, ptr [[START]], <2 x i64> [[VEC_IND]]
+; IC2-NEXT:    [[TMP1:%.*]] = extractelement <2 x ptr> [[TMP0]], i32 0
+; IC2-NEXT:    [[TMP2:%.*]] = getelementptr i32, ptr [[TMP1]], i64 2
+; IC2-NEXT:    store <2 x i32> zeroinitializer, ptr [[TMP1]], align 4
+; IC2-NEXT:    store <2 x i32> zeroinitializer, ptr [[TMP2]], align 4
+; IC2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; IC2-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], splat (i64 2)
+; IC2-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
+; IC2-NEXT:    br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; IC2:       [[MIDDLE_BLOCK]]:
+; IC2-NEXT:    br label %[[EXIT:.*]]
+; IC2:       [[EXIT]]:
+; IC2-NEXT:    ret void
+;
 entry:
   br label %loop
 
@@ -515,12 +583,13 @@ loop:
   %iv = phi i64 [ 0, %entry ], [ %iv.next, %latch ]
   %gep = getelementptr i32, ptr %start, i64 %iv
   %x = load i32, ptr %gep
-  switch i32 %x, label %foo []
+  br i1 0, label %forward, label %latch
 
-foo:
-  br label %latch
+forward:
+  switch i32 %x, label %latch [ i32 0, label %latch ]
 
 latch:
+  %gep.1 = phi ptr [ %gep, %loop ], [ null, %forward ], [ null, %forward ]
   store i32 0, ptr %gep
   %iv.next = add i64 %iv, 1
   %cmp = icmp eq i64 %iv.next, 100
@@ -537,6 +606,7 @@ exit:
 ; IC1: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
 ; IC1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
 ; IC1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
+; IC1: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
 ;.
 ; IC2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
 ; IC2: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -544,4 +614,5 @@ exit:
 ; IC2: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
 ; IC2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
 ; IC2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
+; IC2: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
 ;.



More information about the llvm-commits mailing list