[llvm] [MachinePipeliner] Support `#pragma clang loop pipeline(enable)` (PR #112502)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 16 01:52:03 PDT 2024
https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/112502
Previously `#pragma clang loop pipeline` only accepted `disable`. This patch adds `enable` as a valid argument for this pragma. This allows Software Pipelining optimization to be applied to some loops instead of all loops.
This is llvm part of the fix.
>From 31fa1f6131d2e6d6c0bff27a3fab43e39eff3fa9 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 11 Oct 2024 07:37:31 +0000
Subject: [PATCH] [MachinePipeliner] Support `#pragma clang loop
pipeline(enable)`
Previously `#pragma clang loop pipeline` only accepted `disable`. This
patch adds `enable` as a valid argument for this pragma. This allows
Software Pipelining optimization to be applied to some loops instead of
all loops.
This is llvm part of the fix.
---
llvm/include/llvm/CodeGen/MachinePipeliner.h | 6 +-
llvm/lib/CodeGen/MachinePipeliner.cpp | 21 +++++-
.../CodeGen/Hexagon/swp-pragma-enable.mir | 70 +++++++++++++++++++
.../Hexagon/swp-without-pragma-enable.mir | 70 +++++++++++++++++++
4 files changed, 163 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/Hexagon/swp-pragma-enable.mir
create mode 100644 llvm/test/CodeGen/Hexagon/swp-without-pragma-enable.mir
diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 0cc862590d0c02..ec7dfbae297d99 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -74,7 +74,6 @@ class MachinePipeliner : public MachineFunctionPass {
const InstrItineraryData *InstrItins = nullptr;
const TargetInstrInfo *TII = nullptr;
RegisterClassInfo RegClassInfo;
- bool disabledByPragma = false;
unsigned II_setByPragma = 0;
#ifndef NDEBUG
@@ -104,6 +103,11 @@ class MachinePipeliner : public MachineFunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override;
private:
+ enum class EnableState {
+ Unspecified,
+ Enabled,
+ Disabled,
+ } EState;
void preprocessPhiNodes(MachineBasicBlock &B);
bool canPipelineLoop(MachineLoop &L);
bool scheduleLoop(MachineLoop &L);
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 5475743905032c..492703ba72a5f2 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -197,6 +197,10 @@ static cl::opt<bool>
MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false),
cl::desc("Use the MVE code generator for software pipelining"));
+static cl::opt<bool> ApplyOnlyEnabledByPragma(
+ "pipeliner-apply-only-enabled-by-pragma", cl::Hidden, cl::init(false),
+ cl::desc("Apply Software Pipelining only if enabled by pragma"));
+
namespace llvm {
// A command line option to enable the CopyToPhi DAG mutation.
@@ -320,8 +324,8 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
// Reset the pragma for the next loop in iteration.
- disabledByPragma = false;
II_setByPragma = 0;
+ EState = EnableState::Unspecified;
MachineBasicBlock *LBLK = L.getTopBlock();
@@ -360,8 +364,10 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
II_setByPragma =
mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
+ } else if (S->getString() == "llvm.loop.pipeline.enable") {
+ EState = EnableState::Enabled;
} else if (S->getString() == "llvm.loop.pipeline.disable") {
- disabledByPragma = true;
+ EState = EnableState::Disabled;
}
}
}
@@ -380,7 +386,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
return false;
}
- if (disabledByPragma) {
+ if (EState == EnableState::Disabled) {
ORE->emit([&]() {
return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
L.getStartLoc(), L.getHeader())
@@ -389,6 +395,15 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
return false;
}
+ if (EState != EnableState::Enabled && ApplyOnlyEnabledByPragma) {
+ ORE->emit([&]() {
+ return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+ L.getStartLoc(), L.getHeader())
+ << "Not enabled by Pragma.";
+ });
+ return false;
+ }
+
// Check if the branch can't be understood because we can't do pipelining
// if that's the case.
LI.TBB = nullptr;
diff --git a/llvm/test/CodeGen/Hexagon/swp-pragma-enable.mir b/llvm/test/CodeGen/Hexagon/swp-pragma-enable.mir
new file mode 100644
index 00000000000000..09a0b88fc06af8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-pragma-enable.mir
@@ -0,0 +1,70 @@
+# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner -pipeliner-apply-only-enabled-by-pragma %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test that SWP is applied when enabled by pragma.
+
+# CHECK: Schedule Found?
+
+--- |
+ define dso_local void @f(ptr nocapture noundef %a, i32 noundef %n) {
+ entry:
+ %cmp3 = icmp sgt i32 %n, 0
+ br i1 %cmp3, label %for.body, label %for.cond.cleanup
+
+ for.cond.cleanup:
+ ret void
+
+ for.body:
+ %lsr.iv5 = phi ptr [ %cgep, %for.body ], [ %a, %entry ]
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ %n, %entry ]
+ %0 = load i32, ptr %lsr.iv5, align 4, !tbaa !5
+ %add = add nsw i32 %0, 42
+ store i32 %add, ptr %lsr.iv5, align 4, !tbaa !5
+ %lsr.iv.next = add i32 %lsr.iv, -1
+ %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+ %cgep = getelementptr i8, ptr %lsr.iv5, i32 4
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !9
+ }
+
+ !5 = !{!6, !6, i64 0}
+ !6 = !{!"int", !7, i64 0}
+ !7 = !{!"omnipotent char", !8, i64 0}
+ !8 = !{!"Simple C/C++ TBAA"}
+ !9 = distinct !{!9, !10, !11, !12}
+ !10 = !{!"llvm.loop.mustprogress"}
+ !11 = !{!"llvm.loop.unroll.disable"}
+ !12 = !{!"llvm.loop.pipeline.enable"}
+
+...
+---
+name: f
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.3, %bb.1
+ liveins: $r0, $r1
+
+ %5:intregs = COPY $r1
+ %4:intregs = COPY $r0
+ %6:predregs = C2_cmpgti %5, 0
+ J2_jumpf %6, %bb.1, implicit-def $pc
+
+ bb.3:
+ %10:intregs = COPY %5
+ J2_loop0r %bb.2, %10, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+ J2_jump %bb.2, implicit-def $pc
+
+ bb.1.for.cond.cleanup:
+ PS_jmpret $r31, implicit-def dead $pc
+
+ bb.2.for.body:
+ successors: %bb.1, %bb.2
+
+ %0:intregs = PHI %4, %bb.3, %3, %bb.2
+ %7:intregs = L2_loadri_io %0, 0 :: (load (s32) from %ir.lsr.iv5, !tbaa !5)
+ %8:intregs = nsw A2_addi killed %7, 42
+ %3:intregs = S2_storeri_pi %0, 4, killed %8 :: (store (s32) into %ir.lsr.iv5, !tbaa !5)
+ ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ J2_jump %bb.1, implicit-def $pc
+
+...
diff --git a/llvm/test/CodeGen/Hexagon/swp-without-pragma-enable.mir b/llvm/test/CodeGen/Hexagon/swp-without-pragma-enable.mir
new file mode 100644
index 00000000000000..48efbb2da99a4b
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-without-pragma-enable.mir
@@ -0,0 +1,70 @@
+# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner -pipeliner-apply-only-enabled-by-pragma %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test that SWP is not applied when it's applicable if enabled by pragma and
+# the loop is not enabled by pragma.
+
+# CHECK-NOT: Schedule Found?
+# CHECK: Can not pipeline loop
+--- |
+ define dso_local void @f(ptr nocapture noundef %a, i32 noundef %n) {
+ entry:
+ %cmp3 = icmp sgt i32 %n, 0
+ br i1 %cmp3, label %for.body, label %for.cond.cleanup
+
+ for.cond.cleanup:
+ ret void
+
+ for.body:
+ %lsr.iv5 = phi ptr [ %cgep, %for.body ], [ %a, %entry ]
+ %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ %n, %entry ]
+ %0 = load i32, ptr %lsr.iv5, align 4, !tbaa !5
+ %add = add nsw i32 %0, 42
+ store i32 %add, ptr %lsr.iv5, align 4, !tbaa !5
+ %lsr.iv.next = add i32 %lsr.iv, -1
+ %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+ %cgep = getelementptr i8, ptr %lsr.iv5, i32 4
+ br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !9
+ }
+
+ !5 = !{!6, !6, i64 0}
+ !6 = !{!"int", !7, i64 0}
+ !7 = !{!"omnipotent char", !8, i64 0}
+ !8 = !{!"Simple C/C++ TBAA"}
+ !9 = distinct !{!9, !10, !11}
+ !10 = !{!"llvm.loop.mustprogress"}
+ !11 = !{!"llvm.loop.unroll.disable"}
+
+...
+---
+name: f
+tracksRegLiveness: true
+body: |
+ bb.0.entry:
+ successors: %bb.3, %bb.1
+ liveins: $r0, $r1
+
+ %5:intregs = COPY $r1
+ %4:intregs = COPY $r0
+ %6:predregs = C2_cmpgti %5, 0
+ J2_jumpf %6, %bb.1, implicit-def $pc
+
+ bb.3:
+ %10:intregs = COPY %5
+ J2_loop0r %bb.2, %10, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+ J2_jump %bb.2, implicit-def $pc
+
+ bb.1.for.cond.cleanup:
+ PS_jmpret $r31, implicit-def dead $pc
+
+ bb.2.for.body:
+ successors: %bb.1, %bb.2
+
+ %0:intregs = PHI %4, %bb.3, %3, %bb.2
+ %7:intregs = L2_loadri_io %0, 0 :: (load (s32) from %ir.lsr.iv5, !tbaa !5)
+ %8:intregs = nsw A2_addi killed %7, 42
+ %3:intregs = S2_storeri_pi %0, 4, killed %8 :: (store (s32) into %ir.lsr.iv5, !tbaa !5)
+ ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+ J2_jump %bb.1, implicit-def $pc
+
+...
More information about the llvm-commits
mailing list