[llvm] r333629 - [AMDGPU] Track occupancy in MFI

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Wed May 30 22:36:04 PDT 2018


Author: rampitec
Date: Wed May 30 22:36:04 2018
New Revision: 333629

URL: http://llvm.org/viewvc/llvm-project?rev=333629&view=rev
Log:
[AMDGPU] Track occupancy in MFI

Keep track of achieved occupancy in SIMachineFunctionInfo.
At the moment we have a lot of duplicated or even missed code to
query and maintain occupancy info. Record it in the MFI and
query in a single call. Interfaces:

- getOccupancy() - returns current recorded achieved occupancy.
- getMinAllowedOccupancy() - returns lesser of the achieved occupancy
and the lowest occupancy we are ready to tolerate. For example if
a kernel is memory bound we are ready to tolerate 4 waves.
- limitOccupancy() - record occupancy level if we have to lower it.
- increaseOccupancy() - record occupancy if scheduler managed to
increase the occupancy.

MFI takes care of integrating different checks affecting occupancy,
including LDS use and waves-per-eu attribute. Note that scheduler
starts with not yet known register pressure, so has to record either
limit or increase in occupancy after it is done. Later passes can
just query a resulting value.

New interface is used in the active scheduler and NFC wrt its work.
Changes are also made to experimental schedulers to use it and record
an occupancy after they are done. Before the change waves-per-eu was
ignored by experimental schedulers and tolerance window for memory
bound kernels was not used.

Differential Revision: https://reviews.llvm.org/D47509

Modified:
    llvm/trunk/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
    llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp
    llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h
    llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
    llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Modified: llvm/trunk/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNIterativeScheduler.cpp?rev=333629&r1=333628&r2=333629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNIterativeScheduler.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNIterativeScheduler.cpp Wed May 30 22:36:04 2018
@@ -478,13 +478,19 @@ unsigned GCNIterativeScheduler::tryMaxim
   }
   LLVM_DEBUG(dbgs() << "New occupancy = " << NewOcc
                     << ", prev occupancy = " << Occ << '\n');
+  if (NewOcc > Occ) {
+    SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+    MFI->increaseOccupancy(MF, NewOcc);
+  }
+
   return std::max(NewOcc, Occ);
 }
 
 void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<SISubtarget>();
-  auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  auto TgtOcc = MFI->getMinAllowedOccupancy();
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -501,6 +507,7 @@ void GCNIterativeScheduler::scheduleLega
                        "target occupancy = "
                     << TgtOcc << '\n');
   GCNMaxOccupancySchedStrategy LStrgy(Context);
+  unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
 
   for (int I = 0; I < NumPasses; ++I) {
     // running first pass with TargetOccupancy = 0 mimics previous scheduling
@@ -525,8 +532,10 @@ void GCNIterativeScheduler::scheduleLega
           assert(R->MaxPressure.getOccupancy(ST) >= TgtOcc);
         }
       }
+      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
     }
   }
+  MFI->limitOccupancy(FinalOccupancy);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
@@ -534,7 +543,8 @@ void GCNIterativeScheduler::scheduleLega
 
 void GCNIterativeScheduler::scheduleMinReg(bool force) {
   const auto &ST = MF.getSubtarget<SISubtarget>();
-  const auto TgtOcc = ST.getOccupancyWithLocalMemSize(MF);
+  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  const auto TgtOcc = MFI->getOccupancy();
   sortRegionsByPressure(TgtOcc);
 
   auto MaxPressure = Regions.front()->MaxPressure;
@@ -567,9 +577,8 @@ void GCNIterativeScheduler::scheduleMinR
 void GCNIterativeScheduler::scheduleILP(
   bool TryMaximizeOccupancy) {
   const auto &ST = MF.getSubtarget<SISubtarget>();
-  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
-  auto TgtOcc = std::min(ST.getOccupancyWithLocalMemSize(MF),
-                         MFI->getMaxWavesPerEU());
+  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
+  auto TgtOcc = MFI->getMinAllowedOccupancy();
 
   sortRegionsByPressure(TgtOcc);
   auto Occ = Regions.front()->MaxPressure.getOccupancy(ST);
@@ -582,6 +591,7 @@ void GCNIterativeScheduler::scheduleILP(
                        "target occupancy = "
                     << TgtOcc << '\n');
 
+  unsigned FinalOccupancy = std::min(Occ, MFI->getOccupancy());
   for (auto R : Regions) {
     BuildDAG DAG(*R, *this);
     const auto ILPSchedule = makeGCNILPScheduler(DAG.getBottomRoots(), *this);
@@ -599,6 +609,8 @@ void GCNIterativeScheduler::scheduleILP(
     } else {
       scheduleRegion(*R, ILPSchedule, RP);
       LLVM_DEBUG(printSchedResult(dbgs(), R, RP));
+      FinalOccupancy = std::min(FinalOccupancy, RP.getOccupancy(ST));
     }
   }
+  MFI->limitOccupancy(FinalOccupancy);
 }

Modified: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp?rev=333629&r1=333628&r2=333629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.cpp Wed May 30 22:36:04 2018
@@ -308,9 +308,7 @@ GCNScheduleDAGMILive::GCNScheduleDAGMILi
   ScheduleDAGMILive(C, std::move(S)),
   ST(MF.getSubtarget<SISubtarget>()),
   MFI(*MF.getInfo<SIMachineFunctionInfo>()),
-  StartingOccupancy(std::min(ST.getOccupancyWithLocalMemSize(MFI.getLDSSize(),
-                                                             MF.getFunction()),
-                             MFI.getMaxWavesPerEU())),
+  StartingOccupancy(MFI.getOccupancy()),
   MinOccupancy(StartingOccupancy), Stage(0), RegionIdx(0) {
 
   LLVM_DEBUG(dbgs() << "Starting occupancy is " << StartingOccupancy << ".\n");
@@ -374,16 +372,15 @@ void GCNScheduleDAGMILive::schedule() {
   unsigned NewOccupancy = std::max(WavesAfter, WavesBefore);
   // Allow memory bound functions to drop to 4 waves if not limited by an
   // attribute.
-  unsigned MinMemBoundWaves = std::max(MFI.getMinWavesPerEU(), 4u);
   if (WavesAfter < WavesBefore && WavesAfter < MinOccupancy &&
-      WavesAfter >= MinMemBoundWaves &&
-      (MFI.isMemoryBound() || MFI.needsWaveLimiter())) {
+      WavesAfter >= MFI.getMinAllowedOccupancy()) {
     LLVM_DEBUG(dbgs() << "Function is memory bound, allow occupancy drop up to "
-                      << MinMemBoundWaves << " waves\n");
+                      << MFI.getMinAllowedOccupancy() << " waves\n");
     NewOccupancy = WavesAfter;
   }
   if (NewOccupancy < MinOccupancy) {
     MinOccupancy = NewOccupancy;
+    MFI.limitOccupancy(MinOccupancy);
     LLVM_DEBUG(dbgs() << "Occupancy lowered for the function to "
                       << MinOccupancy << ".\n");
   }

Modified: llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h?rev=333629&r1=333628&r2=333629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/GCNSchedStrategy.h Wed May 30 22:36:04 2018
@@ -64,7 +64,7 @@ class GCNScheduleDAGMILive : public Sche
 
   const SISubtarget &ST;
 
-  const SIMachineFunctionInfo &MFI;
+  SIMachineFunctionInfo &MFI;
 
   // Occupancy target at the beginning of function scheduling cycle.
   unsigned StartingOccupancy;

Modified: llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp?rev=333629&r1=333628&r2=333629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIISelLowering.cpp Wed May 30 22:36:04 2018
@@ -7729,6 +7729,8 @@ void SITargetLowering::finalizeLowering(
   MRI.replaceRegWith(AMDGPU::SCRATCH_WAVE_OFFSET_REG,
                      Info->getScratchWaveOffsetReg());
 
+  Info->limitOccupancy(MF);
+
   TargetLoweringBase::finalizeLowering(MF);
 }
 

Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp?rev=333629&r1=333628&r2=333629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp Wed May 30 22:36:04 2018
@@ -55,6 +55,9 @@ SIMachineFunctionInfo::SIMachineFunction
   FlatWorkGroupSizes = ST.getFlatWorkGroupSizes(F);
   WavesPerEU = ST.getWavesPerEU(F);
 
+  Occupancy = getMaxWavesPerEU();
+  limitOccupancy(MF);
+
   if (!isEntryFunction()) {
     // Non-entry functions have no special inputs for now, other registers
     // required for scratch access.
@@ -176,6 +179,13 @@ SIMachineFunctionInfo::SIMachineFunction
     S.consumeInteger(0, HighBitsOf32BitAddress);
 }
 
+void SIMachineFunctionInfo::limitOccupancy(const MachineFunction &MF) {
+  limitOccupancy(getMaxWavesPerEU());
+  const SISubtarget& ST = MF.getSubtarget<SISubtarget>();
+  limitOccupancy(ST.getOccupancyWithLocalMemSize(getLDSSize(),
+                 MF.getFunction()));
+}
+
 unsigned SIMachineFunctionInfo::addPrivateSegmentBuffer(
   const SIRegisterInfo &TRI) {
   ArgInfo.PrivateSegmentBuffer =

Modified: llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h?rev=333629&r1=333628&r2=333629&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIMachineFunctionInfo.h Wed May 30 22:36:04 2018
@@ -186,6 +186,9 @@ private:
 
   unsigned HighBitsOf32BitAddress;
 
+  // Current recorded maximum possible occupancy.
+  unsigned Occupancy;
+
   MCPhysReg getNextUserSGPR() const;
 
   MCPhysReg getNextSystemSGPR() const;
@@ -641,6 +644,29 @@ public:
       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
     return PSV.first->second.get();
   }
+
+  unsigned getOccupancy() const {
+    return Occupancy;
+  }
+
+  unsigned getMinAllowedOccupancy() const {
+    if (!isMemoryBound() && !needsWaveLimiter())
+      return Occupancy;
+    return (Occupancy < 4) ? Occupancy : 4;
+  }
+
+  void limitOccupancy(const MachineFunction &MF);
+
+  void limitOccupancy(unsigned Limit) {
+    if (Occupancy > Limit)
+      Occupancy = Limit;
+  }
+
+  void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
+    if (Occupancy < Limit)
+      Occupancy = Limit;
+    limitOccupancy(MF);
+  }
 };
 
 } // end namespace llvm




More information about the llvm-commits mailing list