[llvm] [SimplifyCFG] Fix `SimplifyCFG` pass to skip folding when both blocks contain convergence intrinsics. (PR #166452)

Lucie Choi via llvm-commits llvm-commits at lists.llvm.org
Tue Nov 18 13:58:51 PST 2025


https://github.com/luciechoi updated https://github.com/llvm/llvm-project/pull/166452

>From 8d678c16e056bc74bd9f16779ad568909f0da480 Mon Sep 17 00:00:00 2001
From: luciechoi <ychoi0407 at gmail.com>
Date: Tue, 4 Nov 2025 21:01:51 +0000
Subject: [PATCH 1/2] Fix simplifycfg pass

---
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp |  9 +++
 ...rging-duplicate-convergence-instrinsics.ll | 68 +++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll

diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 11db0ec487328..c1b6140abb471 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -230,6 +230,15 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
   // Don't break self-loops.
   if (PredBB == BB) return false;
 
+  // Don't break if both the basic block and the predecessor contain convergent
+  // intrinsics.
+  for (Instruction &I : *BB)
+    if (isa<ConvergenceControlInst>(I)) {
+      for (Instruction &I : *PredBB)
+        if (isa<ConvergenceControlInst>(I))
+          return false;
+    }
+
   // Don't break unwinding instructions or terminators with other side-effects.
   Instruction *PTI = PredBB->getTerminator();
   if (PTI->isSpecialTerminator() || PTI->mayHaveSideEffects())
diff --git a/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll
new file mode 100644
index 0000000000000..d5ae64f6897e3
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes=simplifycfg | FileCheck %s
+
+declare token @llvm.experimental.convergence.entry() #0
+
+define void @nested(i32 %tidx, i32 %tidy, ptr %array) #0 {
+; CHECK-LABEL: @nested(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[TIDY:%.*]], [[TIDX:%.*]]
+; CHECK-NEXT:    [[OR_COND_I:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br label [[FOR_COND_I:%.*]]
+; CHECK:       for.cond.i:
+; CHECK-NEXT:    [[TMP2:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
+; CHECK-NEXT:    br label [[FOR_COND1_I:%.*]]
+; CHECK:       for.cond1.i:
+; CHECK-NEXT:    [[CMP2_I:%.*]] = phi i1 [ false, [[FOR_BODY4_I:%.*]] ], [ true, [[FOR_COND_I]] ]
+; CHECK-NEXT:    [[TMP3:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP2]]) ]
+; CHECK-NEXT:    br i1 [[CMP2_I]], label [[FOR_BODY4_I]], label [[EXIT:%.*]]
+; CHECK:       for.body4.i:
+; CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I:%.*]], label [[FOR_COND1_I]]
+; CHECK:       if.then.i:
+; CHECK-NEXT:    [[HLSL_WAVE_ACTIVE_MAX7_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 0) [ "convergencectrl"(token [[TMP3]]) ]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i32 0
+; CHECK-NEXT:    store i32 [[HLSL_WAVE_ACTIVE_MAX7_I]], ptr [[TMP4]], align 4
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = tail call token @llvm.experimental.convergence.entry()
+  %2 = or i32 %tidy, %tidx
+  %or.cond.i = icmp eq i32 %2, 0
+  br label %for.cond.i
+
+for.cond.i:
+  %3 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
+  br label %for.cond1.i
+
+for.cond1.i:
+  %cmp2.i = phi i1 [ false, %for.body4.i ], [ true, %for.cond.i ]
+  %4 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %3) ]
+  br i1 %cmp2.i, label %for.body4.i, label %cleanup.i.loopexit
+
+for.body4.i:
+  br i1 %or.cond.i, label %if.then.i, label %for.cond1.i
+
+if.then.i:
+  %hlsl.wave.active.max7.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 0) [ "convergencectrl"(token %4) ]
+  %5 = getelementptr inbounds i32, ptr %array, i32 0
+  store i32 %hlsl.wave.active.max7.i, ptr %5, align 4
+  br label %cleanup.i
+
+cleanup.i.loopexit:
+  br label %cleanup.i
+
+cleanup.i:
+  br label %exit
+
+exit:
+  ret void
+}
+
+declare token @llvm.experimental.convergence.loop() #0
+
+declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0
+
+attributes #0 = { convergent }

>From 0cad7f0ef150783d1bd5f15da9d6d51bb87045af Mon Sep 17 00:00:00 2001
From: luciechoi <ychoi0407 at gmail.com>
Date: Tue, 18 Nov 2025 19:58:19 +0000
Subject: [PATCH 2/2] Remove hlsl&spv specific functions

---
 .../skip-merging-duplicate-convergence-instrinsics.ll  | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll
index d5ae64f6897e3..368ae96d0c3c2 100644
--- a/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll
+++ b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll
@@ -20,9 +20,9 @@ define void @nested(i32 %tidx, i32 %tidy, ptr %array) #0 {
 ; CHECK:       for.body4.i:
 ; CHECK-NEXT:    br i1 [[OR_COND_I]], label [[IF_THEN_I:%.*]], label [[FOR_COND1_I]]
 ; CHECK:       if.then.i:
-; CHECK-NEXT:    [[HLSL_WAVE_ACTIVE_MAX7_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 0) [ "convergencectrl"(token [[TMP3]]) ]
+; CHECK-NEXT:    [[TEST_VAL:%.*]] = call spir_func i32 @func_test(i32 0) [ "convergencectrl"(token [[TMP3]]) ]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i32 0
-; CHECK-NEXT:    store i32 [[HLSL_WAVE_ACTIVE_MAX7_I]], ptr [[TMP4]], align 4
+; CHECK-NEXT:    store i32 [[TEST_VAL]], ptr [[TMP4]], align 4
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
@@ -46,9 +46,9 @@ for.body4.i:
   br i1 %or.cond.i, label %if.then.i, label %for.cond1.i
 
 if.then.i:
-  %hlsl.wave.active.max7.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 0) [ "convergencectrl"(token %4) ]
+  %test.val = call spir_func i32 @func_test(i32 0) [ "convergencectrl"(token %4) ]
   %5 = getelementptr inbounds i32, ptr %array, i32 0
-  store i32 %hlsl.wave.active.max7.i, ptr %5, align 4
+  store i32 %test.val, ptr %5, align 4
   br label %cleanup.i
 
 cleanup.i.loopexit:
@@ -63,6 +63,6 @@ exit:
 
 declare token @llvm.experimental.convergence.loop() #0
 
-declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0
+declare i32 @func_test(i32) #0
 
 attributes #0 = { convergent }



More information about the llvm-commits mailing list