[llvm] [IndVarSimplify] Fix `IndVarSimplify` to skip on unfolding predicates when the loop contains control convergence operations. (PR #165643)
Lucie Choi via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 4 10:19:33 PST 2025
https://github.com/luciechoi updated https://github.com/llvm/llvm-project/pull/165643
>From e01b798113699f16db7c7776511bc838c866c69f Mon Sep 17 00:00:00 2001
From: luciechoi <ychoi0407 at gmail.com>
Date: Thu, 30 Oct 2025 03:36:25 +0000
Subject: [PATCH 1/4] Fix indvar pass to skip on unfolding predicates on
control convergence operations
---
llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 31 ++++
.../skip-predication-convergence.ll | 98 ++++++++++++
.../skip-predictaion-nested-convergence.ll | 139 ++++++++++++++++++
3 files changed, 268 insertions(+)
create mode 100644 llvm/test/Transforms/IndVarSimplify/skip-predication-convergence.ll
create mode 100644 llvm/test/Transforms/IndVarSimplify/skip-predictaion-nested-convergence.ll
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 7ebcc219efc15..421aad8872f9a 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1859,6 +1859,37 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
}
}
+ // If the loop body uses a convergence token defined within the loop, skip
+ // predication. This is to avoid changing the convergence behavior of the
+ // loop.
+ SmallVector<BasicBlock *, 16> blocks = ExitingBlocks;
+ SmallVector<Value *, 16> tokens = {};
+ size_t index = 0; // Assume Exiting Blocks are sorted.
+ while (index < blocks.size()) {
+ BasicBlock *BB = blocks[index];
+ index++;
+ const auto exitingBlockName = BB->getName();
+ for (Instruction &I : *BB) {
+ // Check if the instruction uses any convergence tokens.
+ if (auto *CB = dyn_cast<CallBase>(&I);
+ CB && !isa<ConvergenceControlInst>(&I)) {
+ auto token = CB->getConvergenceControlToken();
+ if (token && llvm::is_contained(tokens, token)) {
+ return false;
+ }
+ }
+ if (isa<ConvergenceControlInst>(&I)) {
+ tokens.push_back(cast<Value>(&I));
+ }
+ }
+
+ for (BasicBlock *Succ : successors(BB)) {
+ const auto succName = Succ->getName();
+ if (Succ != L->getLoopLatch() && !llvm::is_contained(blocks, Succ))
+ blocks.push_back(Succ);
+ }
+ }
+
bool Changed = false;
// Finally, do the actual predication for all predicatable blocks. A couple
// of notes here:
diff --git a/llvm/test/Transforms/IndVarSimplify/skip-predication-convergence.ll b/llvm/test/Transforms/IndVarSimplify/skip-predication-convergence.ll
new file mode 100644
index 0000000000000..12fca6778f15e
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/skip-predication-convergence.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=indvars -indvars-predicate-loops=1 -S | FileCheck %s
+
+; Loop with body using loop convergence token should be skipped by IndVarSimplify.
+
+%"class.hlsl::RWStructuredBuffer" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+
+ at _ZL3Out = internal global %"class.hlsl::RWStructuredBuffer" poison, align 8
+ at .str = private unnamed_addr constant [4 x i8] c"Out\00", align 1
+
+declare token @llvm.experimental.convergence.entry() #0
+
+define void @loop() local_unnamed_addr #1 {
+; CHECK-LABEL: @loop(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT: [[TMP1:%.*]] = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP2:%.*]] = tail call target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], i32 0, i32 0)
+; CHECK-NEXT: store target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], ptr @_ZL3Out, align 8
+; CHECK-NEXT: store target("spirv.VulkanBuffer", i32, 12, 1) [[TMP2]], ptr getelementptr inbounds nuw (i8, ptr @_ZL3Out, i64 8), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0)
+; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
+; CHECK: for.cond.i:
+; CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ]
+; CHECK-NEXT: [[TMP4:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 8
+; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_Z4LOOPDV3_J_EXIT_LOOPEXIT:%.*]]
+; CHECK: for.body.i:
+; CHECK-NEXT: [[CMP1_I:%.*]] = icmp eq i32 [[I_0_I]], [[TMP3]]
+; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
+; CHECK-NEXT: br i1 [[CMP1_I]], label [[IF_THEN_I:%.*]], label [[FOR_COND_I]]
+; CHECK: _Z4loopDv3_j.exit.loopexit:
+; CHECK-NEXT: br label [[_Z4LOOPDV3_J_EXIT:%.*]]
+; CHECK: if.then.i:
+; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX2_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 [[TMP3]]) [ "convergencectrl"(token [[TMP4]]) ]
+; CHECK-NEXT: [[TMP5:%.*]] = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], i32 [[TMP3]])
+; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX2_I]], ptr addrspace(11) [[TMP5]], align 4
+; CHECK-NEXT: br label [[_Z4LOOPDV3_J_EXIT]]
+; CHECK: _Z4loopDv3_j.exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call token @llvm.experimental.convergence.entry()
+ %1 = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+ %2 = tail call target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, i32 0, i32 0)
+ store target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, ptr @_ZL3Out, align 8
+ store target("spirv.VulkanBuffer", i32, 12, 1) %2, ptr getelementptr inbounds nuw (i8, ptr @_ZL3Out, i64 8), align 8
+ %3 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0)
+ br label %for.cond.i
+
+; Loop:
+for.cond.i: ; preds = %for.body.i, %entry
+ %i.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ]
+ %4 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
+ %cmp.i = icmp ult i32 %i.0.i, 8
+ br i1 %cmp.i, label %for.body.i, label %_Z4loopDv3_j.exit.loopexit
+
+for.body.i: ; preds = %for.cond.i
+ %cmp1.i = icmp eq i32 %i.0.i, %3
+ %inc.i = add nuw nsw i32 %i.0.i, 1
+ br i1 %cmp1.i, label %if.then.i, label %for.cond.i
+
+; Exit blocks
+_Z4loopDv3_j.exit.loopexit: ; preds = %for.cond.i
+ br label %_Z4loopDv3_j.exit
+
+if.then.i: ; preds = %for.body.i
+ %hlsl.wave.active.max2.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 %3) [ "convergencectrl"(token %4) ]
+ %5 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, i32 %3)
+ store i32 %hlsl.wave.active.max2.i, ptr addrspace(11) %5, align 4
+ br label %_Z4loopDv3_j.exit
+
+_Z4loopDv3_j.exit: ; preds = %_Z4loopDv3_j.exit.loopexit, %if.then.i
+ ret void
+}
+
+; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.spv.thread.id.in.group.i32(i32) #2
+
+; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare token @llvm.experimental.convergence.loop() #0
+
+; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32, i32, i32, i32, ptr) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1), i32, i32) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1), i32) #4
+
+attributes #0 = { convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #1 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { mustprogress nofree nosync nounwind willreturn memory(none) }
+attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
diff --git a/llvm/test/Transforms/IndVarSimplify/skip-predictaion-nested-convergence.ll b/llvm/test/Transforms/IndVarSimplify/skip-predictaion-nested-convergence.ll
new file mode 100644
index 0000000000000..22f25b1428556
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/skip-predictaion-nested-convergence.ll
@@ -0,0 +1,139 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=indvars -indvars-predicate-loops=1 -S | FileCheck %s
+
+; Nested loops with body using loop convergence token should be skipped by IndVarSimplify.
+
+%"class.hlsl::RWStructuredBuffer" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
+
+ at _ZL3Out = internal global %"class.hlsl::RWStructuredBuffer" poison, align 8
+ at .str = private unnamed_addr constant [4 x i8] c"Out\00", align 1
+
+; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare token @llvm.experimental.convergence.entry() #0
+
+define void @nested() local_unnamed_addr #1 {
+; CHECK-LABEL: @nested(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT: [[TMP1:%.*]] = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+; CHECK-NEXT: [[TMP2:%.*]] = tail call target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], i32 0, i32 0)
+; CHECK-NEXT: store target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], ptr @_ZL3Out, align 8
+; CHECK-NEXT: store target("spirv.VulkanBuffer", i32, 12, 1) [[TMP2]], ptr getelementptr inbounds nuw (i8, ptr @_ZL3Out, i64 8), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0)
+; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 1)
+; CHECK-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[TMP3]], 3
+; CHECK-NEXT: [[ADD_I:%.*]] = add nuw nsw i32 [[MUL_I]], [[TMP4]]
+; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
+; CHECK: for.cond.i:
+; CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC10_I:%.*]], [[CLEANUP_I:%.*]] ]
+; CHECK-NEXT: [[TMP5:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 8
+; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_COND1_I_PREHEADER:%.*]], label [[_Z4NESTEDDV3_J_EXIT:%.*]]
+; CHECK: for.cond1.i.preheader:
+; CHECK-NEXT: [[CMP5_I:%.*]] = icmp eq i32 [[I_0_I]], [[TMP3]]
+; CHECK-NEXT: br label [[FOR_COND1_I:%.*]]
+; CHECK: for.cond1.i:
+; CHECK-NEXT: [[J_0_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY4_I:%.*]] ], [ 0, [[FOR_COND1_I_PREHEADER]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP5]]) ]
+; CHECK-NEXT: [[CMP2_I:%.*]] = icmp ult i32 [[J_0_I]], 8
+; CHECK-NEXT: br i1 [[CMP2_I]], label [[FOR_BODY4_I]], label [[CLEANUP_I_LOOPEXIT:%.*]]
+; CHECK: for.body4.i:
+; CHECK-NEXT: [[CMP6_I:%.*]] = icmp eq i32 [[J_0_I]], [[TMP4]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP5_I]], i1 [[CMP6_I]], i1 false
+; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[J_0_I]], 1
+; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN_I:%.*]], label [[FOR_COND1_I]]
+; CHECK: cleanup.i.loopexit:
+; CHECK-NEXT: br label [[CLEANUP_I]]
+; CHECK: if.then.i:
+; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX7_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 [[ADD_I]]) [ "convergencectrl"(token [[TMP6]]) ]
+; CHECK-NEXT: [[TMP7:%.*]] = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], i32 [[ADD_I]])
+; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX7_I]], ptr addrspace(11) [[TMP7]], align 4
+; CHECK-NEXT: br label [[CLEANUP_I]]
+; CHECK: cleanup.i:
+; CHECK-NEXT: [[INC10_I]] = add nuw nsw i32 [[I_0_I]], 1
+; CHECK-NEXT: br label [[FOR_COND_I]]
+; CHECK: _Z4nestedDv3_j.exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call token @llvm.experimental.convergence.entry()
+ %1 = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
+ %2 = tail call target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, i32 0, i32 0)
+ store target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, ptr @_ZL3Out, align 8
+ store target("spirv.VulkanBuffer", i32, 12, 1) %2, ptr getelementptr inbounds nuw (i8, ptr @_ZL3Out, i64 8), align 8
+ %3 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0)
+ %4 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 1)
+ %mul.i = shl nuw nsw i32 %3, 3
+ %add.i = add nuw nsw i32 %mul.i, %4
+ br label %for.cond.i
+
+for.cond.i: ; preds = %cleanup.i, %entry
+ %i.0.i = phi i32 [ 0, %entry ], [ %inc10.i, %cleanup.i ]
+ %5 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
+ %cmp.i = icmp ult i32 %i.0.i, 8
+ br i1 %cmp.i, label %for.cond1.i.preheader, label %_Z4nestedDv3_j.exit
+
+; Preheader:
+for.cond1.i.preheader: ; preds = %for.cond.i
+ %cmp5.i = icmp eq i32 %i.0.i, %3
+ br label %for.cond1.i
+
+; Loop:
+for.cond1.i: ; preds = %for.body4.i, %for.cond1.i.preheader
+ %j.0.i = phi i32 [ %inc.i, %for.body4.i ], [ 0, %for.cond1.i.preheader ]
+ %6 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %5) ]
+ %cmp2.i = icmp ult i32 %j.0.i, 8
+ br i1 %cmp2.i, label %for.body4.i, label %cleanup.i.loopexit
+
+for.body4.i: ; preds = %for.cond1.i
+ %cmp6.i = icmp eq i32 %j.0.i, %4
+ %or.cond = select i1 %cmp5.i, i1 %cmp6.i, i1 false
+ %inc.i = add nuw nsw i32 %j.0.i, 1
+ br i1 %or.cond, label %if.then.i, label %for.cond1.i
+
+; Exit blocks
+cleanup.i.loopexit: ; preds = %for.cond1.i
+ br label %cleanup.i
+
+if.then.i: ; preds = %for.body4.i
+ %hlsl.wave.active.max7.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 %add.i) [ "convergencectrl"(token %6) ]
+ %7 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, i32 %add.i)
+ store i32 %hlsl.wave.active.max7.i, ptr addrspace(11) %7, align 4
+ br label %cleanup.i
+
+cleanup.i: ; preds = %cleanup.i.loopexit, %if.then.i
+ %inc10.i = add nuw nsw i32 %i.0.i, 1
+ br label %for.cond.i
+
+_Z4nestedDv3_j.exit: ; preds = %for.cond.i
+ ret void
+}
+
+; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.spv.thread.id.in.group.i32(i32) #2
+
+; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare token @llvm.experimental.convergence.loop() #0
+
+; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32, i32, i32, i32, ptr) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1), i32, i32) #4
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
+declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1), i32) #4
+
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.experimental.noalias.scope.decl(metadata) #5
+
+attributes #0 = { convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #1 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,8,1" "hlsl.shader"="compute" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+attributes #2 = { mustprogress nofree nosync nounwind willreturn memory(none) }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #5 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+attributes #6 = { nounwind }
>From 4fc8d878d3459192aa8cbf5f8caf136d77598aa2 Mon Sep 17 00:00:00 2001
From: luciechoi <ychoi0407 at gmail.com>
Date: Thu, 30 Oct 2025 21:56:29 +0000
Subject: [PATCH 2/4] Use helper
---
llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 38 +++++--------------
1 file changed, 10 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 421aad8872f9a..3a093b33ddfa7 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -31,6 +31,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -1859,35 +1860,16 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
}
}
- // If the loop body uses a convergence token defined within the loop, skip
- // predication. This is to avoid changing the convergence behavior of the
- // loop.
- SmallVector<BasicBlock *, 16> blocks = ExitingBlocks;
- SmallVector<Value *, 16> tokens = {};
- size_t index = 0; // Assume Exiting Blocks are sorted.
- while (index < blocks.size()) {
- BasicBlock *BB = blocks[index];
- index++;
- const auto exitingBlockName = BB->getName();
- for (Instruction &I : *BB) {
- // Check if the instruction uses any convergence tokens.
- if (auto *CB = dyn_cast<CallBase>(&I);
- CB && !isa<ConvergenceControlInst>(&I)) {
- auto token = CB->getConvergenceControlToken();
- if (token && llvm::is_contained(tokens, token)) {
- return false;
- }
- }
- if (isa<ConvergenceControlInst>(&I)) {
- tokens.push_back(cast<Value>(&I));
- }
- }
+ CodeMetrics Metrics;
+ SmallPtrSet<const Value *, 32> EphValues;
+ for (BasicBlock *BB : L->blocks()) {
+ Metrics.analyzeBasicBlock(BB, *TTI, EphValues, /* PrepareForLTO= */ false,
+ L);
+ }
- for (BasicBlock *Succ : successors(BB)) {
- const auto succName = Succ->getName();
- if (Succ != L->getLoopLatch() && !llvm::is_contained(blocks, Succ))
- blocks.push_back(Succ);
- }
+ if (Metrics.Convergence == ConvergenceKind::ExtendedLoop) {
+ // Do not predicate loops with extended convergence.
+ return false;
}
bool Changed = false;
>From 616241b7ce3532bfab40a3bba3596b9aacf7011e Mon Sep 17 00:00:00 2001
From: luciechoi <ychoi0407 at gmail.com>
Date: Sat, 1 Nov 2025 00:26:23 +0000
Subject: [PATCH 3/4] Check the loop blocks to see if the token is referenced
outside the loop
---
llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 24 ++++++++++---------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 3a093b33ddfa7..ed176a8d0ffa9 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -31,7 +31,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/MemorySSA.h"
@@ -1860,16 +1859,19 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
}
}
- CodeMetrics Metrics;
- SmallPtrSet<const Value *, 32> EphValues;
- for (BasicBlock *BB : L->blocks()) {
- Metrics.analyzeBasicBlock(BB, *TTI, EphValues, /* PrepareForLTO= */ false,
- L);
- }
-
- if (Metrics.Convergence == ConvergenceKind::ExtendedLoop) {
- // Do not predicate loops with extended convergence.
- return false;
+ // Skip if the loop has tokens referenced outside the loop to avoid
+ // changing convergence behavior.
+ for (BasicBlock *Block : L->blocks()) {
+ for (Instruction &I : *Block) {
+ if (I.getType()->isTokenTy()) {
+ for (User *U : I.users()) {
+ Instruction *UserInst = dyn_cast<Instruction>(U);
+ if (UserInst && !L->contains(UserInst)) {
+ return false;
+ }
+ }
+ }
+ }
}
bool Changed = false;
>From 2cdea747f40dbe6efc8bfe1e3cc6d8709e3124c6 Mon Sep 17 00:00:00 2001
From: luciechoi <ychoi0407 at gmail.com>
Date: Tue, 4 Nov 2025 18:19:15 +0000
Subject: [PATCH 4/4] Simplify unit tests
---
.../skip-predication-convergence.ll | 84 ++++-------
.../skip-predication-nested-convergence.ll | 95 ++++++++++++
.../skip-predictaion-nested-convergence.ll | 139 ------------------
3 files changed, 120 insertions(+), 198 deletions(-)
create mode 100644 llvm/test/Transforms/IndVarSimplify/skip-predication-nested-convergence.ll
delete mode 100644 llvm/test/Transforms/IndVarSimplify/skip-predictaion-nested-convergence.ll
diff --git a/llvm/test/Transforms/IndVarSimplify/skip-predication-convergence.ll b/llvm/test/Transforms/IndVarSimplify/skip-predication-convergence.ll
index 12fca6778f15e..59b84a3c082c2 100644
--- a/llvm/test/Transforms/IndVarSimplify/skip-predication-convergence.ll
+++ b/llvm/test/Transforms/IndVarSimplify/skip-predication-convergence.ll
@@ -3,96 +3,62 @@
; Loop with body using loop convergence token should be skipped by IndVarSimplify.
-%"class.hlsl::RWStructuredBuffer" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-
- at _ZL3Out = internal global %"class.hlsl::RWStructuredBuffer" poison, align 8
- at .str = private unnamed_addr constant [4 x i8] c"Out\00", align 1
-
declare token @llvm.experimental.convergence.entry() #0
-define void @loop() local_unnamed_addr #1 {
+define void @loop(i32 %tid, ptr %array) #0 {
; CHECK-LABEL: @loop(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry()
-; CHECK-NEXT: [[TMP1:%.*]] = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], i32 0, i32 0)
-; CHECK-NEXT: store target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], ptr @_ZL3Out, align 8
-; CHECK-NEXT: store target("spirv.VulkanBuffer", i32, 12, 1) [[TMP2]], ptr getelementptr inbounds nuw (i8, ptr @_ZL3Out, i64 8), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0)
; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
; CHECK: for.cond.i:
; CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC_I:%.*]], [[FOR_BODY_I:%.*]] ]
-; CHECK-NEXT: [[TMP4:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
+; CHECK-NEXT: [[TMP1:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
; CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 8
-; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[_Z4LOOPDV3_J_EXIT_LOOPEXIT:%.*]]
+; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_BODY_I]], label [[EXIT_LOOPEXIT:%.*]]
; CHECK: for.body.i:
-; CHECK-NEXT: [[CMP1_I:%.*]] = icmp eq i32 [[I_0_I]], [[TMP3]]
+; CHECK-NEXT: [[CMP1_I:%.*]] = icmp eq i32 [[I_0_I]], [[TID:%.*]]
; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[I_0_I]], 1
; CHECK-NEXT: br i1 [[CMP1_I]], label [[IF_THEN_I:%.*]], label [[FOR_COND_I]]
-; CHECK: _Z4loopDv3_j.exit.loopexit:
-; CHECK-NEXT: br label [[_Z4LOOPDV3_J_EXIT:%.*]]
+; CHECK: exit.loopexit:
+; CHECK-NEXT: br label [[EXIT:%.*]]
; CHECK: if.then.i:
-; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX2_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 [[TMP3]]) [ "convergencectrl"(token [[TMP4]]) ]
-; CHECK-NEXT: [[TMP5:%.*]] = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], i32 [[TMP3]])
-; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX2_I]], ptr addrspace(11) [[TMP5]], align 4
-; CHECK-NEXT: br label [[_Z4LOOPDV3_J_EXIT]]
-; CHECK: _Z4loopDv3_j.exit:
+; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX2_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 [[TID]]) [ "convergencectrl"(token [[TMP1]]) ]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i32 [[TID]]
+; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX2_I]], ptr [[TMP2]], align 4
+; CHECK-NEXT: br label [[EXIT]]
+; CHECK: exit:
; CHECK-NEXT: ret void
;
entry:
%0 = tail call token @llvm.experimental.convergence.entry()
- %1 = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
- %2 = tail call target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, i32 0, i32 0)
- store target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, ptr @_ZL3Out, align 8
- store target("spirv.VulkanBuffer", i32, 12, 1) %2, ptr getelementptr inbounds nuw (i8, ptr @_ZL3Out, i64 8), align 8
- %3 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0)
br label %for.cond.i
-; Loop:
-for.cond.i: ; preds = %for.body.i, %entry
+for.cond.i:
%i.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.body.i ]
- %4 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
+ %2 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
%cmp.i = icmp ult i32 %i.0.i, 8
- br i1 %cmp.i, label %for.body.i, label %_Z4loopDv3_j.exit.loopexit
+ br i1 %cmp.i, label %for.body.i, label %exit.loopexit
-for.body.i: ; preds = %for.cond.i
- %cmp1.i = icmp eq i32 %i.0.i, %3
+for.body.i:
+ %cmp1.i = icmp eq i32 %i.0.i, %tid
%inc.i = add nuw nsw i32 %i.0.i, 1
br i1 %cmp1.i, label %if.then.i, label %for.cond.i
-; Exit blocks
-_Z4loopDv3_j.exit.loopexit: ; preds = %for.cond.i
- br label %_Z4loopDv3_j.exit
+exit.loopexit:
+ br label %exit
-if.then.i: ; preds = %for.body.i
- %hlsl.wave.active.max2.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 %3) [ "convergencectrl"(token %4) ]
- %5 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, i32 %3)
- store i32 %hlsl.wave.active.max2.i, ptr addrspace(11) %5, align 4
- br label %_Z4loopDv3_j.exit
+if.then.i:
+ %hlsl.wave.active.max2.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 %tid) [ "convergencectrl"(token %2) ]
+ %3 = getelementptr inbounds i32, ptr %array, i32 %tid
+ store i32 %hlsl.wave.active.max2.i, ptr %3, align 4
+ br label %exit
-_Z4loopDv3_j.exit: ; preds = %_Z4loopDv3_j.exit.loopexit, %if.then.i
+exit:
ret void
}
-; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-declare i32 @llvm.spv.thread.id.in.group.i32(i32) #2
-
-; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare token @llvm.experimental.convergence.loop() #0
-; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32, i32, i32, i32, ptr) #4
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1), i32, i32) #4
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1), i32) #4
-
-attributes #0 = { convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
-attributes #1 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,1,1" "hlsl.shader"="compute" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-attributes #2 = { mustprogress nofree nosync nounwind willreturn memory(none) }
-attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
+attributes #0 = { convergent }
diff --git a/llvm/test/Transforms/IndVarSimplify/skip-predication-nested-convergence.ll b/llvm/test/Transforms/IndVarSimplify/skip-predication-nested-convergence.ll
new file mode 100644
index 0000000000000..0944205839aca
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/skip-predication-nested-convergence.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -passes=indvars -indvars-predicate-loops=1 -S | FileCheck %s
+
+; Nested loops with body using loop convergence token should be skipped by IndVarSimplify.
+
+declare token @llvm.experimental.convergence.entry() #0
+
+define void @nested(i32 %tidx, i32 %tidy, ptr %array) #0 {
+; CHECK-LABEL: @nested(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry()
+; CHECK-NEXT: [[MUL_I:%.*]] = shl nsw i32 [[TIDX:%.*]], 3
+; CHECK-NEXT: [[ADD_I:%.*]] = add nsw i32 [[MUL_I]], [[TIDY:%.*]]
+; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
+; CHECK: for.cond.i:
+; CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC10_I:%.*]], [[CLEANUP_I:%.*]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
+; CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 8
+; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_COND1_I_PREHEADER:%.*]], label [[EXIT:%.*]]
+; CHECK: for.cond1.i.preheader:
+; CHECK-NEXT: [[CMP5_I:%.*]] = icmp eq i32 [[I_0_I]], [[TIDX]]
+; CHECK-NEXT: br label [[FOR_COND1_I:%.*]]
+; CHECK: for.cond1.i:
+; CHECK-NEXT: [[J_0_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY4_I:%.*]] ], [ 0, [[FOR_COND1_I_PREHEADER]] ]
+; CHECK-NEXT: [[TMP2:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP1]]) ]
+; CHECK-NEXT: [[CMP2_I:%.*]] = icmp ult i32 [[J_0_I]], 8
+; CHECK-NEXT: br i1 [[CMP2_I]], label [[FOR_BODY4_I]], label [[CLEANUP_I_LOOPEXIT:%.*]]
+; CHECK: for.body4.i:
+; CHECK-NEXT: [[CMP6_I:%.*]] = icmp eq i32 [[J_0_I]], [[TIDY]]
+; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP5_I]], i1 [[CMP6_I]], i1 false
+; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[J_0_I]], 1
+; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN_I:%.*]], label [[FOR_COND1_I]]
+; CHECK: cleanup.i.loopexit:
+; CHECK-NEXT: br label [[CLEANUP_I]]
+; CHECK: if.then.i:
+; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX7_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 [[ADD_I]]) [ "convergencectrl"(token [[TMP2]]) ]
+; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[ARRAY:%.*]], i32 [[ADD_I]]
+; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX7_I]], ptr [[TMP3]], align 4
+; CHECK-NEXT: br label [[CLEANUP_I]]
+; CHECK: cleanup.i:
+; CHECK-NEXT: [[INC10_I]] = add nuw nsw i32 [[I_0_I]], 1
+; CHECK-NEXT: br label [[FOR_COND_I]]
+; CHECK: exit:
+; CHECK-NEXT: ret void
+;
+entry:
+ %0 = tail call token @llvm.experimental.convergence.entry()
+ %mul.i = shl nsw i32 %tidx, 3
+ %add.i = add nsw i32 %mul.i, %tidy
+ br label %for.cond.i
+
+for.cond.i:
+ %i.0.i = phi i32 [ 0, %entry ], [ %inc10.i, %cleanup.i ]
+ %2 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
+ %cmp.i = icmp ult i32 %i.0.i, 8
+ br i1 %cmp.i, label %for.cond1.i.preheader, label %exit
+
+for.cond1.i.preheader:
+ %cmp5.i = icmp eq i32 %i.0.i, %tidx
+ br label %for.cond1.i
+
+for.cond1.i:
+ %j.0.i = phi i32 [ %inc.i, %for.body4.i ], [ 0, %for.cond1.i.preheader ]
+ %3 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %2) ]
+ %cmp2.i = icmp ult i32 %j.0.i, 8
+ br i1 %cmp2.i, label %for.body4.i, label %cleanup.i.loopexit
+
+for.body4.i:
+ %cmp6.i = icmp eq i32 %j.0.i, %tidy
+ %or.cond = select i1 %cmp5.i, i1 %cmp6.i, i1 false
+ %inc.i = add nsw i32 %j.0.i, 1
+ br i1 %or.cond, label %if.then.i, label %for.cond1.i
+
+cleanup.i.loopexit:
+ br label %cleanup.i
+
+if.then.i:
+ %hlsl.wave.active.max7.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 %add.i) [ "convergencectrl"(token %3) ]
+ %4 = getelementptr inbounds i32, ptr %array, i32 %add.i
+ store i32 %hlsl.wave.active.max7.i, ptr %4, align 4
+ br label %cleanup.i
+
+cleanup.i:
+ %inc10.i = add nsw i32 %i.0.i, 1
+ br label %for.cond.i
+
+exit:
+ ret void
+}
+
+declare token @llvm.experimental.convergence.loop() #0
+
+declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0
+
+attributes #0 = { convergent }
diff --git a/llvm/test/Transforms/IndVarSimplify/skip-predictaion-nested-convergence.ll b/llvm/test/Transforms/IndVarSimplify/skip-predictaion-nested-convergence.ll
deleted file mode 100644
index 22f25b1428556..0000000000000
--- a/llvm/test/Transforms/IndVarSimplify/skip-predictaion-nested-convergence.ll
+++ /dev/null
@@ -1,139 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -passes=indvars -indvars-predicate-loops=1 -S | FileCheck %s
-
-; Nested loops with body using loop convergence token should be skipped by IndVarSimplify.
-
-%"class.hlsl::RWStructuredBuffer" = type { target("spirv.VulkanBuffer", [0 x i32], 12, 1), target("spirv.VulkanBuffer", i32, 12, 1) }
-
- at _ZL3Out = internal global %"class.hlsl::RWStructuredBuffer" poison, align 8
- at .str = private unnamed_addr constant [4 x i8] c"Out\00", align 1
-
-; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare token @llvm.experimental.convergence.entry() #0
-
-define void @nested() local_unnamed_addr #1 {
-; CHECK-LABEL: @nested(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = tail call token @llvm.experimental.convergence.entry()
-; CHECK-NEXT: [[TMP1:%.*]] = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
-; CHECK-NEXT: [[TMP2:%.*]] = tail call target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], i32 0, i32 0)
-; CHECK-NEXT: store target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], ptr @_ZL3Out, align 8
-; CHECK-NEXT: store target("spirv.VulkanBuffer", i32, 12, 1) [[TMP2]], ptr getelementptr inbounds nuw (i8, ptr @_ZL3Out, i64 8), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0)
-; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 1)
-; CHECK-NEXT: [[MUL_I:%.*]] = shl nuw nsw i32 [[TMP3]], 3
-; CHECK-NEXT: [[ADD_I:%.*]] = add nuw nsw i32 [[MUL_I]], [[TMP4]]
-; CHECK-NEXT: br label [[FOR_COND_I:%.*]]
-; CHECK: for.cond.i:
-; CHECK-NEXT: [[I_0_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC10_I:%.*]], [[CLEANUP_I:%.*]] ]
-; CHECK-NEXT: [[TMP5:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP0]]) ]
-; CHECK-NEXT: [[CMP_I:%.*]] = icmp ult i32 [[I_0_I]], 8
-; CHECK-NEXT: br i1 [[CMP_I]], label [[FOR_COND1_I_PREHEADER:%.*]], label [[_Z4NESTEDDV3_J_EXIT:%.*]]
-; CHECK: for.cond1.i.preheader:
-; CHECK-NEXT: [[CMP5_I:%.*]] = icmp eq i32 [[I_0_I]], [[TMP3]]
-; CHECK-NEXT: br label [[FOR_COND1_I:%.*]]
-; CHECK: for.cond1.i:
-; CHECK-NEXT: [[J_0_I:%.*]] = phi i32 [ [[INC_I:%.*]], [[FOR_BODY4_I:%.*]] ], [ 0, [[FOR_COND1_I_PREHEADER]] ]
-; CHECK-NEXT: [[TMP6:%.*]] = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token [[TMP5]]) ]
-; CHECK-NEXT: [[CMP2_I:%.*]] = icmp ult i32 [[J_0_I]], 8
-; CHECK-NEXT: br i1 [[CMP2_I]], label [[FOR_BODY4_I]], label [[CLEANUP_I_LOOPEXIT:%.*]]
-; CHECK: for.body4.i:
-; CHECK-NEXT: [[CMP6_I:%.*]] = icmp eq i32 [[J_0_I]], [[TMP4]]
-; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[CMP5_I]], i1 [[CMP6_I]], i1 false
-; CHECK-NEXT: [[INC_I]] = add nuw nsw i32 [[J_0_I]], 1
-; CHECK-NEXT: br i1 [[OR_COND]], label [[IF_THEN_I:%.*]], label [[FOR_COND1_I]]
-; CHECK: cleanup.i.loopexit:
-; CHECK-NEXT: br label [[CLEANUP_I]]
-; CHECK: if.then.i:
-; CHECK-NEXT: [[HLSL_WAVE_ACTIVE_MAX7_I:%.*]] = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 [[ADD_I]]) [ "convergencectrl"(token [[TMP6]]) ]
-; CHECK-NEXT: [[TMP7:%.*]] = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) [[TMP1]], i32 [[ADD_I]])
-; CHECK-NEXT: store i32 [[HLSL_WAVE_ACTIVE_MAX7_I]], ptr addrspace(11) [[TMP7]], align 4
-; CHECK-NEXT: br label [[CLEANUP_I]]
-; CHECK: cleanup.i:
-; CHECK-NEXT: [[INC10_I]] = add nuw nsw i32 [[I_0_I]], 1
-; CHECK-NEXT: br label [[FOR_COND_I]]
-; CHECK: _Z4nestedDv3_j.exit:
-; CHECK-NEXT: ret void
-;
-entry:
- %0 = tail call token @llvm.experimental.convergence.entry()
- %1 = tail call target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32 0, i32 0, i32 1, i32 0, ptr nonnull @.str)
- %2 = tail call target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, i32 0, i32 0)
- store target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, ptr @_ZL3Out, align 8
- store target("spirv.VulkanBuffer", i32, 12, 1) %2, ptr getelementptr inbounds nuw (i8, ptr @_ZL3Out, i64 8), align 8
- %3 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 0)
- %4 = tail call i32 @llvm.spv.thread.id.in.group.i32(i32 1)
- %mul.i = shl nuw nsw i32 %3, 3
- %add.i = add nuw nsw i32 %mul.i, %4
- br label %for.cond.i
-
-for.cond.i: ; preds = %cleanup.i, %entry
- %i.0.i = phi i32 [ 0, %entry ], [ %inc10.i, %cleanup.i ]
- %5 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %0) ]
- %cmp.i = icmp ult i32 %i.0.i, 8
- br i1 %cmp.i, label %for.cond1.i.preheader, label %_Z4nestedDv3_j.exit
-
-; Preheader:
-for.cond1.i.preheader: ; preds = %for.cond.i
- %cmp5.i = icmp eq i32 %i.0.i, %3
- br label %for.cond1.i
-
-; Loop:
-for.cond1.i: ; preds = %for.body4.i, %for.cond1.i.preheader
- %j.0.i = phi i32 [ %inc.i, %for.body4.i ], [ 0, %for.cond1.i.preheader ]
- %6 = call token @llvm.experimental.convergence.loop() [ "convergencectrl"(token %5) ]
- %cmp2.i = icmp ult i32 %j.0.i, 8
- br i1 %cmp2.i, label %for.body4.i, label %cleanup.i.loopexit
-
-for.body4.i: ; preds = %for.cond1.i
- %cmp6.i = icmp eq i32 %j.0.i, %4
- %or.cond = select i1 %cmp5.i, i1 %cmp6.i, i1 false
- %inc.i = add nuw nsw i32 %j.0.i, 1
- br i1 %or.cond, label %if.then.i, label %for.cond1.i
-
-; Exit blocks
-cleanup.i.loopexit: ; preds = %for.cond1.i
- br label %cleanup.i
-
-if.then.i: ; preds = %for.body4.i
- %hlsl.wave.active.max7.i = call spir_func i32 @llvm.spv.wave.reduce.umax.i32(i32 %add.i) [ "convergencectrl"(token %6) ]
- %7 = tail call noundef align 4 dereferenceable(4) ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1) %1, i32 %add.i)
- store i32 %hlsl.wave.active.max7.i, ptr addrspace(11) %7, align 4
- br label %cleanup.i
-
-cleanup.i: ; preds = %cleanup.i.loopexit, %if.then.i
- %inc10.i = add nuw nsw i32 %i.0.i, 1
- br label %for.cond.i
-
-_Z4nestedDv3_j.exit: ; preds = %for.cond.i
- ret void
-}
-
-; Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-declare i32 @llvm.spv.thread.id.in.group.i32(i32) #2
-
-; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare token @llvm.experimental.convergence.loop() #0
-
-; Function Attrs: convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare i32 @llvm.spv.wave.reduce.umax.i32(i32) #0
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare target("spirv.VulkanBuffer", [0 x i32], 12, 1) @llvm.spv.resource.handlefrombinding.tspirv.VulkanBuffer_a0i32_12_1t(i32, i32, i32, i32, ptr) #4
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare target("spirv.VulkanBuffer", i32, 12, 1) @llvm.spv.resource.counterhandlefromimplicitbinding.tspirv.VulkanBuffer_i32_12_1t.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1), i32, i32) #4
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(none)
-declare ptr addrspace(11) @llvm.spv.resource.getpointer.p11.tspirv.VulkanBuffer_a0i32_12_1t(target("spirv.VulkanBuffer", [0 x i32], 12, 1), i32) #4
-
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
-declare void @llvm.experimental.noalias.scope.decl(metadata) #5
-
-attributes #0 = { convergent mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
-attributes #1 = { convergent noinline norecurse "frame-pointer"="all" "hlsl.numthreads"="8,8,1" "hlsl.shader"="compute" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-attributes #2 = { mustprogress nofree nosync nounwind willreturn memory(none) }
-attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn memory(none) }
-attributes #5 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
-attributes #6 = { nounwind }
More information about the llvm-commits
mailing list