[llvm] [MachinePipeliner] Support `#pragma clang loop pipeline(enable)` (PR #112502)

Ryotaro Kasuga via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 16 01:52:03 PDT 2024


https://github.com/kasuga-fj created https://github.com/llvm/llvm-project/pull/112502

Previously `#pragma clang loop pipeline` only accepted `disable`. This patch adds `enable` as a valid argument for this pragma. This allows Software Pipelining optimization to be applied to some loops instead of all loops.

This is llvm part of the fix.

>From 31fa1f6131d2e6d6c0bff27a3fab43e39eff3fa9 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Fri, 11 Oct 2024 07:37:31 +0000
Subject: [PATCH] [MachinePipeliner] Support `#pragma clang loop
 pipeline(enable)`

Previously `#pragma clang loop pipeline` only accepted `disable`. This
patch adds `enable` as a valid argument for this pragma. This allows
Software Pipelining optimization to be applied to some loops instead of
all loops.

This is llvm part of the fix.
---
 llvm/include/llvm/CodeGen/MachinePipeliner.h  |  6 +-
 llvm/lib/CodeGen/MachinePipeliner.cpp         | 21 +++++-
 .../CodeGen/Hexagon/swp-pragma-enable.mir     | 70 +++++++++++++++++++
 .../Hexagon/swp-without-pragma-enable.mir     | 70 +++++++++++++++++++
 4 files changed, 163 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/CodeGen/Hexagon/swp-pragma-enable.mir
 create mode 100644 llvm/test/CodeGen/Hexagon/swp-without-pragma-enable.mir

diff --git a/llvm/include/llvm/CodeGen/MachinePipeliner.h b/llvm/include/llvm/CodeGen/MachinePipeliner.h
index 0cc862590d0c02..ec7dfbae297d99 100644
--- a/llvm/include/llvm/CodeGen/MachinePipeliner.h
+++ b/llvm/include/llvm/CodeGen/MachinePipeliner.h
@@ -74,7 +74,6 @@ class MachinePipeliner : public MachineFunctionPass {
   const InstrItineraryData *InstrItins = nullptr;
   const TargetInstrInfo *TII = nullptr;
   RegisterClassInfo RegClassInfo;
-  bool disabledByPragma = false;
   unsigned II_setByPragma = 0;
 
 #ifndef NDEBUG
@@ -104,6 +103,11 @@ class MachinePipeliner : public MachineFunctionPass {
   void getAnalysisUsage(AnalysisUsage &AU) const override;
 
 private:
+  enum class EnableState {
+    Unspecified,
+    Enabled,
+    Disabled,
+  } EState;
   void preprocessPhiNodes(MachineBasicBlock &B);
   bool canPipelineLoop(MachineLoop &L);
   bool scheduleLoop(MachineLoop &L);
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index 5475743905032c..492703ba72a5f2 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -197,6 +197,10 @@ static cl::opt<bool>
     MVECodeGen("pipeliner-mve-cg", cl::Hidden, cl::init(false),
                cl::desc("Use the MVE code generator for software pipelining"));
 
+static cl::opt<bool> ApplyOnlyEnabledByPragma(
+    "pipeliner-apply-only-enabled-by-pragma", cl::Hidden, cl::init(false),
+    cl::desc("Apply Software Pipelining only if enabled by pragma"));
+
 namespace llvm {
 
 // A command line option to enable the CopyToPhi DAG mutation.
@@ -320,8 +324,8 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
 
 void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
   // Reset the pragma for the next loop in iteration.
-  disabledByPragma = false;
   II_setByPragma = 0;
+  EState = EnableState::Unspecified;
 
   MachineBasicBlock *LBLK = L.getTopBlock();
 
@@ -360,8 +364,10 @@ void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
       II_setByPragma =
           mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
       assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
+    } else if (S->getString() == "llvm.loop.pipeline.enable") {
+      EState = EnableState::Enabled;
     } else if (S->getString() == "llvm.loop.pipeline.disable") {
-      disabledByPragma = true;
+      EState = EnableState::Disabled;
     }
   }
 }
@@ -380,7 +386,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
     return false;
   }
 
-  if (disabledByPragma) {
+  if (EState == EnableState::Disabled) {
     ORE->emit([&]() {
       return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
                                                L.getStartLoc(), L.getHeader())
@@ -389,6 +395,15 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
     return false;
   }
 
+  if (EState != EnableState::Enabled && ApplyOnlyEnabledByPragma) {
+    ORE->emit([&]() {
+      return MachineOptimizationRemarkAnalysis(DEBUG_TYPE, "canPipelineLoop",
+                                               L.getStartLoc(), L.getHeader())
+             << "Not enabled by Pragma.";
+    });
+    return false;
+  }
+
   // Check if the branch can't be understood because we can't do pipelining
   // if that's the case.
   LI.TBB = nullptr;
diff --git a/llvm/test/CodeGen/Hexagon/swp-pragma-enable.mir b/llvm/test/CodeGen/Hexagon/swp-pragma-enable.mir
new file mode 100644
index 00000000000000..09a0b88fc06af8
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-pragma-enable.mir
@@ -0,0 +1,70 @@
+# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner -pipeliner-apply-only-enabled-by-pragma %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test that SWP is applied when enabled by pragma.
+
+# CHECK: Schedule Found?
+
+--- |
+  define dso_local void @f(ptr nocapture noundef %a, i32 noundef %n) {
+  entry:
+    %cmp3 = icmp sgt i32 %n, 0
+    br i1 %cmp3, label %for.body, label %for.cond.cleanup
+  
+  for.cond.cleanup:
+    ret void
+  
+  for.body:
+    %lsr.iv5 = phi ptr [ %cgep, %for.body ], [ %a, %entry ]
+    %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ %n, %entry ]
+    %0 = load i32, ptr %lsr.iv5, align 4, !tbaa !5
+    %add = add nsw i32 %0, 42
+    store i32 %add, ptr %lsr.iv5, align 4, !tbaa !5
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+    %cgep = getelementptr i8, ptr %lsr.iv5, i32 4
+    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !9
+  }
+
+  !5 = !{!6, !6, i64 0}
+  !6 = !{!"int", !7, i64 0}
+  !7 = !{!"omnipotent char", !8, i64 0}
+  !8 = !{!"Simple C/C++ TBAA"}
+  !9 = distinct !{!9, !10, !11, !12}
+  !10 = !{!"llvm.loop.mustprogress"}
+  !11 = !{!"llvm.loop.unroll.disable"}
+  !12 = !{!"llvm.loop.pipeline.enable"}
+
+...
+---
+name:            f
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.3, %bb.1
+    liveins: $r0, $r1
+  
+    %5:intregs = COPY $r1
+    %4:intregs = COPY $r0
+    %6:predregs = C2_cmpgti %5, 0
+    J2_jumpf %6, %bb.1, implicit-def $pc
+  
+  bb.3:
+    %10:intregs = COPY %5
+    J2_loop0r %bb.2, %10, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+    J2_jump %bb.2, implicit-def $pc
+  
+  bb.1.for.cond.cleanup:
+    PS_jmpret $r31, implicit-def dead $pc
+  
+  bb.2.for.body:
+    successors: %bb.1, %bb.2
+  
+    %0:intregs = PHI %4, %bb.3, %3, %bb.2
+    %7:intregs = L2_loadri_io %0, 0 :: (load (s32) from %ir.lsr.iv5, !tbaa !5)
+    %8:intregs = nsw A2_addi killed %7, 42
+    %3:intregs = S2_storeri_pi %0, 4, killed %8 :: (store (s32) into %ir.lsr.iv5, !tbaa !5)
+    ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.1, implicit-def $pc
+
+...
diff --git a/llvm/test/CodeGen/Hexagon/swp-without-pragma-enable.mir b/llvm/test/CodeGen/Hexagon/swp-without-pragma-enable.mir
new file mode 100644
index 00000000000000..48efbb2da99a4b
--- /dev/null
+++ b/llvm/test/CodeGen/Hexagon/swp-without-pragma-enable.mir
@@ -0,0 +1,70 @@
+# RUN: llc -mtriple=hexagon -run-pass pipeliner -debug-only=pipeliner -pipeliner-apply-only-enabled-by-pragma %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# Test that SWP is not applied when it's applicable if enabled by pragma and
+# the loop is not enabled by pragma.
+
+# CHECK-NOT: Schedule Found?
+# CHECK: Can not pipeline loop
+--- |
+  define dso_local void @f(ptr nocapture noundef %a, i32 noundef %n) {
+  entry:
+    %cmp3 = icmp sgt i32 %n, 0
+    br i1 %cmp3, label %for.body, label %for.cond.cleanup
+  
+  for.cond.cleanup:
+    ret void
+  
+  for.body:
+    %lsr.iv5 = phi ptr [ %cgep, %for.body ], [ %a, %entry ]
+    %lsr.iv = phi i32 [ %lsr.iv.next, %for.body ], [ %n, %entry ]
+    %0 = load i32, ptr %lsr.iv5, align 4, !tbaa !5
+    %add = add nsw i32 %0, 42
+    store i32 %add, ptr %lsr.iv5, align 4, !tbaa !5
+    %lsr.iv.next = add i32 %lsr.iv, -1
+    %exitcond.not = icmp eq i32 %lsr.iv.next, 0
+    %cgep = getelementptr i8, ptr %lsr.iv5, i32 4
+    br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !9
+  }
+
+  !5 = !{!6, !6, i64 0}
+  !6 = !{!"int", !7, i64 0}
+  !7 = !{!"omnipotent char", !8, i64 0}
+  !8 = !{!"Simple C/C++ TBAA"}
+  !9 = distinct !{!9, !10, !11}
+  !10 = !{!"llvm.loop.mustprogress"}
+  !11 = !{!"llvm.loop.unroll.disable"}
+
+...
+---
+name:            f
+tracksRegLiveness: true
+body:             |
+  bb.0.entry:
+    successors: %bb.3, %bb.1
+    liveins: $r0, $r1
+  
+    %5:intregs = COPY $r1
+    %4:intregs = COPY $r0
+    %6:predregs = C2_cmpgti %5, 0
+    J2_jumpf %6, %bb.1, implicit-def $pc
+  
+  bb.3:
+    %10:intregs = COPY %5
+    J2_loop0r %bb.2, %10, implicit-def $lc0, implicit-def $sa0, implicit-def $usr
+    J2_jump %bb.2, implicit-def $pc
+  
+  bb.1.for.cond.cleanup:
+    PS_jmpret $r31, implicit-def dead $pc
+  
+  bb.2.for.body:
+    successors: %bb.1, %bb.2
+  
+    %0:intregs = PHI %4, %bb.3, %3, %bb.2
+    %7:intregs = L2_loadri_io %0, 0 :: (load (s32) from %ir.lsr.iv5, !tbaa !5)
+    %8:intregs = nsw A2_addi killed %7, 42
+    %3:intregs = S2_storeri_pi %0, 4, killed %8 :: (store (s32) into %ir.lsr.iv5, !tbaa !5)
+    ENDLOOP0 %bb.2, implicit-def $pc, implicit-def $lc0, implicit $sa0, implicit $lc0
+    J2_jump %bb.1, implicit-def $pc
+
+...



More information about the llvm-commits mailing list