[llvm] [SPIR-V] Add llvm.loop.unroll metadata lowering (PR #132062)
Dmitry Sidorov via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 19 10:13:16 PDT 2025
https://github.com/MrSidims created https://github.com/llvm/llvm-project/pull/132062
.enable lowers to Unroll LoopControl
.disable lowers to DontUnroll LoopControl
.count lowers to PartialCount LoopControl
.full lowers to Unroll + PartialCount LoopControls
TODO: enable structurizer for non-vulkan targets.
>From d8ac7ce6c8b36abc8116e66d7f9265599f74a7b7 Mon Sep 17 00:00:00 2001
From: "Sidorov, Dmitry" <dmitry.sidorov at intel.com>
Date: Sun, 16 Mar 2025 03:28:49 -0700
Subject: [PATCH] [SPIR-V] Add llvm.loop.unroll metadata lowering
.enable lowers to Unroll LoopControl
.disable lowers to DontUnroll LoopControl
.count lowers to PartialCount LoopControl
.full lowers to Unroll + PartialCount LoopControls
TODO: enable structurizer for non-vulkan targets.
Signed-off-by: Sidorov, Dmitry <dmitry.sidorov at intel.com>
---
.../Target/SPIRV/SPIRVInstructionSelector.cpp | 7 +-
llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp | 34 +++
.../Transforms/Utils/BreakCriticalEdges.cpp | 2 +
.../CodeGen/SPIRV/structurizer/loop-unroll.ll | 228 ++++++++++++++++++
4 files changed, 268 insertions(+), 3 deletions(-)
create mode 100644 llvm/test/CodeGen/SPIRV/structurizer/loop-unroll.ll
diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
index b188f36ca9a9e..ee93b9df3bc4e 100644
--- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp
@@ -2985,10 +2985,11 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg,
case Intrinsic::spv_loop_merge: {
auto MIB = BuildMI(BB, I, I.getDebugLoc(), TII.get(SPIRV::OpLoopMerge));
for (unsigned i = 1; i < I.getNumExplicitOperands(); ++i) {
- assert(I.getOperand(i).isMBB());
- MIB.addMBB(I.getOperand(i).getMBB());
+ if (I.getOperand(i).isMBB())
+ MIB.addMBB(I.getOperand(i).getMBB());
+ else
+ MIB.addImm(foldImm(I.getOperand(i), MRI));
}
- MIB.addImm(SPIRV::SelectionControl::None);
return MIB.constrainAllUses(TII, TRI, RBI);
}
case Intrinsic::spv_selection_merge: {
diff --git a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
index d20ea85f75909..8ba41b3c20702 100644
--- a/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
+++ b/llvm/lib/Target/SPIRV/SPIRVStructurizer.cpp
@@ -611,6 +611,40 @@ class SPIRVStructurizer : public FunctionPass {
auto MergeAddress = BlockAddress::get(Merge->getParent(), Merge);
auto ContinueAddress = BlockAddress::get(Continue->getParent(), Continue);
SmallVector<Value *, 2> Args = {MergeAddress, ContinueAddress};
+ unsigned LC = SPIRV::LoopControl::None;
+ // Currently used only to store PartialCount value. Later when other
+ // LoopControls are added - this map should be sorted before making
+ // them loop_merge operands to satisfy 3.23. Loop Control requirements.
+ std::vector<std::pair<unsigned, unsigned>> MaskToValueMap;
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.disable")) {
+ LC |= SPIRV::LoopControl::DontUnroll;
+ } else {
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.enable")) {
+ LC |= SPIRV::LoopControl::Unroll;
+ }
+ std::optional<int> Count =
+ getOptionalIntLoopAttribute(L, "llvm.loop.unroll.count");
+ if (Count && Count != 1) {
+ LC |= SPIRV::LoopControl::PartialCount;
+ MaskToValueMap.emplace_back(
+ std::make_pair(SPIRV::LoopControl::PartialCount, *Count));
+ }
+ if (getBooleanLoopAttribute(L, "llvm.loop.unroll.full")) {
+ // llvm.loop.unroll.full doesn't have a direct counterpart in SPIR-V,
+ // the closest thing we can do is to add Unroll mask and if the trip
+ // count is not known at compile time - either disable unrolling by
+ // setting PartialCount to 1 or reuse already available PartialCount.
+ LC |= SPIRV::LoopControl::Unroll;
+ if ((LC & SPIRV::LoopControl::PartialCount) == 0) {
+ LC |= SPIRV::LoopControl::PartialCount;
+ MaskToValueMap.emplace_back(
+ std::make_pair(SPIRV::LoopControl::PartialCount, 1));
+ }
+ }
+ }
+ Args.emplace_back(llvm::ConstantInt::get(Builder.getInt32Ty(), LC));
+ for (auto &[Mask, Val] : MaskToValueMap)
+ Args.emplace_back(llvm::ConstantInt::get(Builder.getInt32Ty(), Val));
Builder.CreateIntrinsic(Intrinsic::spv_loop_merge, {}, {Args});
Modified = true;
diff --git a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
index 62b4b545f29bb..17a84757237bc 100644
--- a/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
+++ b/llvm/lib/Transforms/Utils/BreakCriticalEdges.cpp
@@ -175,6 +175,8 @@ llvm::SplitKnownCriticalEdge(Instruction *TI, unsigned SuccNum,
// Create our unconditional branch.
BranchInst *NewBI = BranchInst::Create(DestBB, NewBB);
NewBI->setDebugLoc(TI->getDebugLoc());
+ if (auto *LoopMD = TI->getMetadata(LLVMContext::MD_loop))
+ NewBI->setMetadata(LLVMContext::MD_loop, LoopMD);
// Insert the block into the function... right after the block TI lives in.
Function &F = *TIBB->getParent();
diff --git a/llvm/test/CodeGen/SPIRV/structurizer/loop-unroll.ll b/llvm/test/CodeGen/SPIRV/structurizer/loop-unroll.ll
new file mode 100644
index 0000000000000..296e7d3859f37
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/structurizer/loop-unroll.ll
@@ -0,0 +1,228 @@
+; RUN: llc -mtriple=spirv-unknown-vulkan-compute -O0 -verify-machineinstrs %s -o - | FileCheck %s
+; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan-compute %s -o - -filetype=obj | spirv-val %}
+
+; CHECK-DAG: OpName %[[#For:]] "for_loop"
+; CHECK-DAG: OpName %[[#While:]] "while_loop"
+; CHECK-DAG: OpName %[[#DoWhile:]] "do_while_loop"
+; CHECK-DAG: OpName %[[#Disable:]] "unroll_disable"
+; CHECK-DAG: OpName %[[#Count:]] "unroll_count"
+; CHECK-DAG: OpName %[[#Full:]] "unroll_full"
+; CHECK-DAG: OpName %[[#FullCount:]] "unroll_full_count"
+
+; CHECK: %[[#For]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll
+
+; CHECK: %[[#While]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll
+
+; CHECK: %[[#DoWhile]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll
+
+; CHECK: %[[#Disable]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] DontUnroll
+
+; CHECK: %[[#Count]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] PartialCount 4
+
+; CHECK: %[[#Full]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll|PartialCount 1
+
+; CHECK: %[[#FullCount]] = OpFunction
+; CHECK: OpLoopMerge %[[#]] %[[#]] Unroll|PartialCount 4
+
+define dso_local void @for_loop(ptr noundef %0, i32 noundef %1) {
+ %3 = alloca ptr, align 8
+ %4 = alloca i32, align 4
+ %5 = alloca i32, align 4
+ store ptr %0, ptr %3, align 8
+ store i32 %1, ptr %4, align 4
+ store i32 0, ptr %5, align 4
+ br label %6
+
+6: ; preds = %15, %2
+ %7 = load i32, ptr %5, align 4
+ %8 = load i32, ptr %4, align 4
+ %9 = icmp slt i32 %7, %8
+ br i1 %9, label %10, label %18
+
+10: ; preds = %6
+ %11 = load i32, ptr %5, align 4
+ %12 = load ptr, ptr %3, align 8
+ %13 = load i32, ptr %12, align 4
+ %14 = add nsw i32 %13, %11
+ store i32 %14, ptr %12, align 4
+ br label %15
+
+15: ; preds = %10
+ %16 = load i32, ptr %5, align 4
+ %17 = add nsw i32 %16, 1
+ store i32 %17, ptr %5, align 4
+ br label %6, !llvm.loop !1
+
+18: ; preds = %6
+ ret void
+}
+
+define dso_local void @while_loop(ptr noundef %0, i32 noundef %1) {
+ %3 = alloca ptr, align 8
+ %4 = alloca i32, align 4
+ %5 = alloca i32, align 4
+ store ptr %0, ptr %3, align 8
+ store i32 %1, ptr %4, align 4
+ store i32 0, ptr %5, align 4
+ br label %6
+
+6: ; preds = %10, %2
+ %7 = load i32, ptr %5, align 4
+ %8 = load i32, ptr %4, align 4
+ %9 = icmp slt i32 %7, %8
+ br i1 %9, label %10, label %17
+
+10: ; preds = %6
+ %11 = load i32, ptr %5, align 4
+ %12 = load ptr, ptr %3, align 8
+ %13 = load i32, ptr %12, align 4
+ %14 = add nsw i32 %13, %11
+ store i32 %14, ptr %12, align 4
+ %15 = load i32, ptr %5, align 4
+ %16 = add nsw i32 %15, 1
+ store i32 %16, ptr %5, align 4
+ br label %6, !llvm.loop !3
+
+17: ; preds = %6
+ ret void
+}
+
+define dso_local void @do_while_loop(ptr noundef %0, i32 noundef %1) {
+ %3 = alloca ptr, align 8
+ %4 = alloca i32, align 4
+ %5 = alloca i32, align 4
+ store ptr %0, ptr %3, align 8
+ store i32 %1, ptr %4, align 4
+ store i32 0, ptr %5, align 4
+ br label %6
+
+6: ; preds = %13, %2
+ %7 = load i32, ptr %5, align 4
+ %8 = load ptr, ptr %3, align 8
+ %9 = load i32, ptr %8, align 4
+ %10 = add nsw i32 %9, %7
+ store i32 %10, ptr %8, align 4
+ %11 = load i32, ptr %5, align 4
+ %12 = add nsw i32 %11, 1
+ store i32 %12, ptr %5, align 4
+ br label %13
+
+13: ; preds = %6
+ %14 = load i32, ptr %5, align 4
+ %15 = load i32, ptr %4, align 4
+ %16 = icmp slt i32 %14, %15
+ br i1 %16, label %6, label %17, !llvm.loop !4
+
+17: ; preds = %13
+ ret void
+}
+
+define dso_local void @unroll_disable(i32 noundef %0) {
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ store i32 %0, ptr %2, align 4
+ store i32 0, ptr %3, align 4
+ br label %4
+
+4: ; preds = %7, %1
+ %5 = load i32, ptr %3, align 4
+ %6 = add nsw i32 %5, 1
+ store i32 %6, ptr %3, align 4
+ br label %7
+
+7: ; preds = %4
+ %8 = load i32, ptr %3, align 4
+ %9 = load i32, ptr %2, align 4
+ %10 = icmp slt i32 %8, %9
+ br i1 %10, label %4, label %11, !llvm.loop !5
+
+11: ; preds = %7
+ ret void
+}
+
+define dso_local void @unroll_count(i32 noundef %0) {
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ store i32 %0, ptr %2, align 4
+ store i32 0, ptr %3, align 4
+ br label %4
+
+4: ; preds = %7, %1
+ %5 = load i32, ptr %3, align 4
+ %6 = add nsw i32 %5, 1
+ store i32 %6, ptr %3, align 4
+ br label %7
+
+7: ; preds = %4
+ %8 = load i32, ptr %3, align 4
+ %9 = load i32, ptr %2, align 4
+ %10 = icmp slt i32 %8, %9
+ br i1 %10, label %4, label %11, !llvm.loop !7
+
+11: ; preds = %7
+ ret void
+}
+
+define dso_local void @unroll_full(i32 noundef %0) {
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ store i32 %0, ptr %2, align 4
+ store i32 0, ptr %3, align 4
+ br label %4
+
+4: ; preds = %7, %1
+ %5 = load i32, ptr %3, align 4
+ %6 = add nsw i32 %5, 1
+ store i32 %6, ptr %3, align 4
+ br label %7
+
+7: ; preds = %4
+ %8 = load i32, ptr %3, align 4
+ %9 = load i32, ptr %2, align 4
+ %10 = icmp slt i32 %8, %9
+ br i1 %10, label %4, label %11, !llvm.loop !9
+
+11: ; preds = %7
+ ret void
+}
+
+define dso_local void @unroll_full_count(i32 noundef %0) {
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ store i32 %0, ptr %2, align 4
+ store i32 0, ptr %3, align 4
+ br label %4
+
+4: ; preds = %7, %1
+ %5 = load i32, ptr %3, align 4
+ %6 = add nsw i32 %5, 1
+ store i32 %6, ptr %3, align 4
+ br label %7
+
+7: ; preds = %4
+ %8 = load i32, ptr %3, align 4
+ %9 = load i32, ptr %2, align 4
+ %10 = icmp slt i32 %8, %9
+ br i1 %10, label %4, label %11, !llvm.loop !11
+
+11: ; preds = %7
+ ret void
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.unroll.enable"}
+!3 = distinct !{!3, !2}
+!4 = distinct !{!4, !2}
+!5 = distinct !{!5, !6}
+!6 = !{!"llvm.loop.unroll.disable"}
+!7 = distinct !{!7, !8}
+!8 = !{!"llvm.loop.unroll.count", i32 4}
+!9 = distinct !{!9, !10}
+!10 = !{!"llvm.loop.unroll.full"}
+!11 = distinct !{!11, !10, !8}
More information about the llvm-commits
mailing list