[llvm] 9ffd2e4 - [SimplifyCFG] Fix `SimplifyCFG` pass to skip folding when both blocks contain convergence loop/entry intrinsics. (#166452)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Nov 29 13:57:10 PST 2025
Author: Lucie Choi
Date: 2025-11-30T06:57:06+09:00
New Revision: 9ffd2e40c1c469e3ccb0798fa15fc38d6df42652
URL: https://github.com/llvm/llvm-project/commit/9ffd2e40c1c469e3ccb0798fa15fc38d6df42652
DIFF: https://github.com/llvm/llvm-project/commit/9ffd2e40c1c469e3ccb0798fa15fc38d6df42652.diff
LOG: [SimplifyCFG] Fix `SimplifyCFG` pass to skip folding when both blocks contain convergence loop/entry intrinsics. (#166452)
Fixes a bug https://github.com/llvm/llvm-project/issues/165642. [Similar
fix](https://github.com/llvm/llvm-project/pull/165643) is being made in
`IndVarSimplify` pass to account for convergence tokens.
[LLVM
Spec](https://llvm.org/docs/ConvergentOperations.html#llvm-experimental-convergence-loop)
states that only a single loop / entry convergence token can be included
in a basic block.
This PR fixes the issue in `SimplifyCFG` pass so that when a basic block
and its predecessor both contain such convergence intrinsics, it skips
merging the two blocks.
Added:
llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll
Modified:
llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 11db0ec487328..076c5da4393fc 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -92,6 +92,15 @@ emptyAndDetachBlock(BasicBlock *BB,
"applying corresponding DTU updates.");
}
+static bool HasLoopOrEntryConvergenceToken(const BasicBlock *BB) {
+ for (const Instruction &I : *BB) {
+ const ConvergenceControlInst *CCI = dyn_cast<ConvergenceControlInst>(&I);
+ if (CCI && (CCI->isLoop() || CCI->isEntry()))
+ return true;
+ }
+ return false;
+}
+
void llvm::detachDeadBlocks(ArrayRef<BasicBlock *> BBs,
SmallVectorImpl<DominatorTree::UpdateType> *Updates,
bool KeepOneInputPHIs) {
@@ -259,6 +268,13 @@ bool llvm::MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU,
if (llvm::is_contained(PN.incoming_values(), &PN))
return false;
+ // Don't break if both the basic block and the predecessor contain loop or
+ // entry convergent intrinsics, since there may only be one convergence token
+ // per block.
+ if (HasLoopOrEntryConvergenceToken(BB) &&
+ HasLoopOrEntryConvergenceToken(PredBB))
+ return false;
+
LLVM_DEBUG(dbgs() << "Merging: " << BB->getName() << " into "
<< PredBB->getName() << "\n");
diff --git a/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll
new file mode 100644
index 0000000000000..368ae96d0c3c2
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/skip-merging-duplicate-convergence-instrinsics.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -passes=simplifycfg | FileCheck %s
+
+declare token @llvm.experimental.convergence.entry() #0
+
+define void @nested(i32 %tidx, i32 %tidy, ptr %array) #0 {
+; CHECK-LABEL: @nested(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[TIDY:%.*]], [[TIDX:%.*]]
+; CHECK-NEXT: [[OR_COND_I:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
+; CHECK: for.cond.i:
+; CHECK-NEXT: [[TMP2:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
+; CHECK-NEXT: br label [[FOR_COND1_I:%.*]]
+; CHECK: for.cond1.i:
+; CHECK-NEXT: [[CMP2_I:%.*]] = phi i1 [ false, [[FOR_BODY4_I:%.*]] ], [ true, [[FOR_COND_I]] ]
+; CHECK-NEXT: [[TMP3:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP2]]) ]
+; CHECK-NEXT: br i1 [[CMP2_I]], label [[FOR_BODY4_I]], label [[EXIT:%.*]]
+; CHECK: for.body4.i:
+; CHECK-NEXT: br i1 [[OR_COND_I]], label [[IF_THEN_I:%.*]], label [[FOR_COND1_I]]
+; CHECK: if.then.i:
+; CHECK-NEXT: [[TEST_VAL:%.*]] = call spir_func i32 @func_test(i32 0) [ "convergencectrl"(token [[TMP3]]) ]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i32 0
+; CHECK-NEXT: store i32 [[TEST_VAL]], ptr [[TMP4]], align 4
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call token @llvm.experimental.convergence.entry()
+ %2 = or i32 %tidy, %tidx
+ %or.cond.i = icmp eq i32 %2, 0
+ br label %for.cond.i
+
+for.cond.i:
+ %3 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
+ br label %for.cond1.i
+
+for.cond1.i:
+ %cmp2.i = phi i1 [ false, %for.body4.i ], [ true, %for.cond.i ]
+ %4 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %3) ]
+ br i1 %cmp2.i, label %for.body4.i, label %cleanup.i.loopexit
+
+for.body4.i:
+ br i1 %or.cond.i, label %if.then.i, label %for.cond1.i
+
+if.then.i:
+ %test.val = call spir_func i32 @func_test(i32 0) [ "convergencectrl"(token %4) ]
+ %5 = getelementptr inbounds i32, ptr %array, i32 0
+ store i32 %test.val, ptr %5, align 4
+ br label %cleanup.i
+
+cleanup.i.loopexit:
+ br label %cleanup.i
+
+cleanup.i:
+ br label %exit
+
+exit:
+ ret void
+}
+
+declare token @llvm.experimental.convergence.loop() #0
+
+declare i32 @func_test(i32) #0
+
+attributes #0 = { convergent }
More information about the llvm-commits
mailing list