[llvm] 477e3fe - Revert "AMDGPU: Don't consider global pressure when bundling soft clauses"

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 3 10:25:12 PST 2021


Author: Matt Arsenault
Date: 2021-02-03T13:25:05-05:00
New Revision: 477e3fe4f874b1c4d5896f3bfaf7b3b8a6d38103

URL: https://github.com/llvm/llvm-project/commit/477e3fe4f874b1c4d5896f3bfaf7b3b8a6d38103
DIFF: https://github.com/llvm/llvm-project/commit/477e3fe4f874b1c4d5896f3bfaf7b3b8a6d38103.diff

LOG: Revert "AMDGPU: Don't consider global pressure when bundling soft clauses"

This reverts commit 1e377a273f59375d8e6a424f66f069b3adfa1ca4.

A regression was reported.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/GCNRegPressure.h
    llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp

Removed: 
    llvm/test/CodeGen/AMDGPU/soft-clause-bundle-local-pressure.mir


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index d07da90aa944..ba8c85aa502b 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -127,21 +127,6 @@ class GCNRPTracker {
     return Res;
   }
 
-  /// Return the VGPR pressure at the current point
-  unsigned getCurrentNumVGPR() const {
-    return CurPressure.getVGPRNum();
-  }
-
-  /// Return the SGPR pressure at the current point
-  unsigned getCurrentNumSGPR() const {
-    return CurPressure.getSGPRNum();
-  }
-
-  /// Return the occupancy at the current point
-  unsigned getCurrentOccupancy(const GCNSubtarget &ST) const {
-    return CurPressure.getOccupancy(ST);
-  }
-
   decltype(LiveRegs) moveLiveRegs() {
     return std::move(LiveRegs);
   }

diff  --git a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
index acaa3b367415..59ce080c8e70 100644
--- a/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFormMemoryClauses.cpp
@@ -64,11 +64,10 @@ class SIFormMemoryClauses : public MachineFunctionPass {
 
   bool canBundle(const MachineInstr &MI, const RegUse &Defs,
                  const RegUse &Uses) const;
-  bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT,
-                     bool IsVMEMClause);
+  bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT);
   void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
   bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
-                      GCNDownwardRPTracker &RPT, bool IsVMEMClause);
+                      GCNDownwardRPTracker &RPT);
 
   const GCNSubtarget *ST;
   const SIRegisterInfo *TRI;
@@ -244,33 +243,21 @@ bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs,
 // Function returns false if pressure would hit the limit if instruction is
 // bundled into a memory clause.
 bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI,
-                                        GCNDownwardRPTracker &RPT,
-                                        bool IsVMEMClause) {
-  unsigned OldOccupancy = RPT.getCurrentOccupancy(*ST);
-
+                                        GCNDownwardRPTracker &RPT) {
   // NB: skip advanceBeforeNext() call. Since all defs will be marked
   // early-clobber they will all stay alive at least to the end of the
-  // clause. Therefore we should not decrease pressure even if load pointer
-  // becomes dead and could otherwise be reused for destination.
+  // clause. Therefor we should not decrease pressure even if load
+  // pointer becomes dead and could otherwise be reused for destination.
   RPT.advanceToNext();
-
-  unsigned NewOccupancy = RPT.getCurrentOccupancy(*ST);
-  if (NewOccupancy < OldOccupancy &&
-      NewOccupancy < MFI->getMinAllowedOccupancy())
-    return false;
-
-  // Scalar loads/clauses cannot def VGPRs, and vector loads/clauses cannot def
-  // SGPRs (ignoring artificial implicit operands).
-  if (IsVMEMClause) {
-    if (RPT.getCurrentNumVGPR() > MaxVGPRs)
-      return false;
-  } else {
-    if (RPT.getCurrentNumSGPR() > MaxSGPRs)
-      return false;
+  GCNRegPressure MaxPressure = RPT.moveMaxPressure();
+  unsigned Occupancy = MaxPressure.getOccupancy(*ST);
+  if (Occupancy >= MFI->getMinAllowedOccupancy() &&
+      MaxPressure.getVGPRNum() <= MaxVGPRs &&
+      MaxPressure.getSGPRNum() <= MaxSGPRs) {
+    LastRecordedOccupancy = Occupancy;
+    return true;
   }
-
-  LastRecordedOccupancy = NewOccupancy;
-  return true;
+  return false;
 }
 
 // Collect register defs and uses along with their lane masks and states.
@@ -302,14 +289,13 @@ void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
 // Check register def/use conflicts, occupancy limits and collect def/use maps.
 // Return true if instruction can be bundled with previous. It it cannot
 // def/use maps are not updated.
-bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI, RegUse &Defs,
-                                         RegUse &Uses,
-                                         GCNDownwardRPTracker &RPT,
-                                         bool IsVMEMClause) {
+bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
+                                         RegUse &Defs, RegUse &Uses,
+                                         GCNDownwardRPTracker &RPT) {
   if (!canBundle(MI, Defs, Uses))
     return false;
 
-  if (!checkPressure(MI, RPT, IsVMEMClause))
+  if (!checkPressure(MI, RPT))
     return false;
 
   collectRegUses(MI, Defs, Uses);
@@ -363,7 +349,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
 
       const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getLiveRegs());
       RegUse Defs, Uses;
-      if (!processRegUses(MI, Defs, Uses, RPT, IsVMEM)) {
+      if (!processRegUses(MI, Defs, Uses, RPT)) {
         RPT.reset(MI, &LiveRegsCopy);
         continue;
       }
@@ -381,7 +367,7 @@ bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
         // A load from pointer which was loaded inside the same bundle is an
         // impossible clause because we will need to write and read the same
         // register inside. In this case processRegUses will return false.
-        if (!processRegUses(*Next, Defs, Uses, RPT, IsVMEM))
+        if (!processRegUses(*Next, Defs, Uses, RPT))
           break;
 
         ++Length;

diff  --git a/llvm/test/CodeGen/AMDGPU/soft-clause-bundle-local-pressure.mir b/llvm/test/CodeGen/AMDGPU/soft-clause-bundle-local-pressure.mir
deleted file mode 100644
index a9409dddcdd4..000000000000
--- a/llvm/test/CodeGen/AMDGPU/soft-clause-bundle-local-pressure.mir
+++ /dev/null
@@ -1,271 +0,0 @@
-# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack -run-pass=si-form-memory-clauses -verify-machineinstrs -o - %s | FileCheck %s
-
-# The VGPR pressure at the point the scalar clause would form decreases the
-# occupancy. However, this occupancy decrease is not due to the introduction of
-# the bundle so the bundle should still be inserted.
-
----
-name: form_sgpr_clause_high_vgpr_pressure
-tracksRegLiveness: true
-machineFunctionInfo:
-  isEntryFunction: true
-  waveLimiter:     false
-  memoryBound: false
-  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
-  stackPtrOffsetReg: '$sgpr32'
-  occupancy:       10
-
-body:             |
-  bb.0:
-    liveins: $sgpr4_sgpr5, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24
-    ; CHECK-LABEL: name: form_sgpr_clause_high_vgpr_pressure
-    ; CHECK: early-clobber %26:sreg_32_xm0_xexec, early-clobber %27:sreg_32_xm0_xexec = BUNDLE [[REG:%[0-9]+]] {
-    ; CHECK:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG]], 0, 0, 0 :: (load 4, addrspace 4)
-    ; CHECK:   [[S_LOAD_DWORD_IMM1:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM [[REG]], 8, 0, 0 :: (load 4, addrspace 4)
-    ; CHECK: }
-    %0:sreg_64 = COPY $sgpr4_sgpr5
-    %1:vgpr_32 = COPY $vgpr0
-    %2:vgpr_32 = COPY $vgpr1
-    %3:vgpr_32 = COPY $vgpr2
-    %4:vgpr_32 = COPY $vgpr3
-    %5:vgpr_32 = COPY $vgpr4
-    %6:vgpr_32 = COPY $vgpr5
-    %7:vgpr_32 = COPY $vgpr6
-    %8:vgpr_32 = COPY $vgpr7
-    %9:vgpr_32 = COPY $vgpr8
-    %10:vgpr_32 = COPY $vgpr9
-    %11:vgpr_32 = COPY $vgpr10
-    %12:vgpr_32 = COPY $vgpr11
-    %13:vgpr_32 = COPY $vgpr12
-    %14:vgpr_32 = COPY $vgpr13
-    %15:vgpr_32 = COPY $vgpr14
-    %16:vgpr_32 = COPY $vgpr15
-    %17:vgpr_32 = COPY $vgpr16
-    %18:vgpr_32 = COPY $vgpr17
-    %19:vgpr_32 = COPY $vgpr18
-    %20:vgpr_32 = COPY $vgpr19
-    %21:vgpr_32 = COPY $vgpr20
-    %22:vgpr_32 = COPY $vgpr21
-    %23:vgpr_32 = COPY $vgpr22
-    %24:vgpr_32 = COPY $vgpr23
-    %27:vgpr_32 = COPY $vgpr24
-    %25:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 0, 0, 0 :: (load 4, align 4, addrspace 4)
-    %26:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %0, 8, 0, 0 :: (load 4, align 4, addrspace 4)
-    S_NOP 0, implicit %1
-    S_NOP 0, implicit %2
-    S_NOP 0, implicit %3
-    S_NOP 0, implicit %4
-    S_NOP 0, implicit %5
-    S_NOP 0, implicit %6
-    S_NOP 0, implicit %7
-    S_NOP 0, implicit %8
-    S_NOP 0, implicit %9
-    S_NOP 0, implicit %10
-    S_NOP 0, implicit %11
-    S_NOP 0, implicit %12
-    S_NOP 0, implicit %13
-    S_NOP 0, implicit %14
-    S_NOP 0, implicit %15
-    S_NOP 0, implicit %16
-    S_NOP 0, implicit %17
-    S_NOP 0, implicit %18
-    S_NOP 0, implicit %19
-    S_NOP 0, implicit %20
-    S_NOP 0, implicit %21
-    S_NOP 0, implicit %22
-    S_NOP 0, implicit %23
-    S_NOP 0, implicit %24
-    S_NOP 0, implicit %27
-    S_ENDPGM 0, implicit %25, implicit %26
-
-...
-
-# The SGPR pressure at the point the vector clause would form decreases the
-# occupancy. However, this occupancy decrease is not due to the introduction of
-# the bundle so the bundle should still be inserted.
----
-name: form_vgpr_clause_high_sgpr_pressure
-tracksRegLiveness: true
-machineFunctionInfo:
-  isEntryFunction: true
-  waveLimiter:     false
-  memoryBound: false
-  scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
-  stackPtrOffsetReg: '$sgpr32'
-  occupancy:       10
-
-body:             |
-  bb.0:
-    liveins: $vgpr0_vgpr1, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80
-    ; CHECK-LABEL: name: form_vgpr_clause_high_sgpr_pressure
-    ; CHECK: early-clobber %82:vgpr_32, early-clobber %83:vgpr_32 = BUNDLE [[REG:%[0-9]+]], implicit $exec {
-    ; CHECK:   [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG]], 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
-    ; CHECK:   [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG]], 8, 0, 0, 0, implicit $exec :: (load 4, addrspace 1)
-    ; CHECK: }
-    %0:vreg_64 = COPY $vgpr0_vgpr1
-    %1:sgpr_32 = COPY $sgpr0
-    %2:sgpr_32 = COPY $sgpr1
-    %3:sgpr_32 = COPY $sgpr2
-    %4:sgpr_32 = COPY $sgpr3
-    %5:sgpr_32 = COPY $sgpr4
-    %6:sgpr_32 = COPY $sgpr5
-    %7:sgpr_32 = COPY $sgpr6
-    %8:sgpr_32 = COPY $sgpr7
-    %9:sgpr_32 = COPY $sgpr8
-    %10:sgpr_32 = COPY $sgpr9
-    %11:sgpr_32 = COPY $sgpr10
-    %12:sgpr_32 = COPY $sgpr11
-    %13:sgpr_32 = COPY $sgpr12
-    %14:sgpr_32 = COPY $sgpr13
-    %15:sgpr_32 = COPY $sgpr14
-    %16:sgpr_32 = COPY $sgpr15
-    %17:sgpr_32 = COPY $sgpr16
-    %18:sgpr_32 = COPY $sgpr17
-    %19:sgpr_32 = COPY $sgpr18
-    %20:sgpr_32 = COPY $sgpr19
-    %21:sgpr_32 = COPY $sgpr20
-    %22:sgpr_32 = COPY $sgpr21
-    %23:sgpr_32 = COPY $sgpr22
-    %24:sgpr_32 = COPY $sgpr23
-    %25:sgpr_32 = COPY $sgpr24
-    %26:sgpr_32 = COPY $sgpr25
-    %27:sgpr_32 = COPY $sgpr26
-    %28:sgpr_32 = COPY $sgpr27
-    %29:sgpr_32 = COPY $sgpr28
-    %30:sgpr_32 = COPY $sgpr29
-    %31:sgpr_32 = COPY $sgpr30
-    %32:sgpr_32 = COPY $sgpr31
-    %33:sgpr_32 = COPY $sgpr32
-    %34:sgpr_32 = COPY $sgpr33
-    %35:sgpr_32 = COPY $sgpr34
-    %36:sgpr_32 = COPY $sgpr35
-    %37:sgpr_32 = COPY $sgpr36
-    %38:sgpr_32 = COPY $sgpr37
-    %39:sgpr_32 = COPY $sgpr38
-    %40:sgpr_32 = COPY $sgpr39
-    %41:sgpr_32 = COPY $sgpr40
-    %42:sgpr_32 = COPY $sgpr41
-    %43:sgpr_32 = COPY $sgpr42
-    %44:sgpr_32 = COPY $sgpr43
-    %45:sgpr_32 = COPY $sgpr44
-    %46:sgpr_32 = COPY $sgpr45
-    %47:sgpr_32 = COPY $sgpr46
-    %48:sgpr_32 = COPY $sgpr47
-    %49:sgpr_32 = COPY $sgpr48
-    %50:sgpr_32 = COPY $sgpr49
-    %51:sgpr_32 = COPY $sgpr50
-    %52:sgpr_32 = COPY $sgpr51
-    %53:sgpr_32 = COPY $sgpr52
-    %54:sgpr_32 = COPY $sgpr53
-    %55:sgpr_32 = COPY $sgpr54
-    %56:sgpr_32 = COPY $sgpr55
-    %57:sgpr_32 = COPY $sgpr56
-    %58:sgpr_32 = COPY $sgpr57
-    %59:sgpr_32 = COPY $sgpr58
-    %60:sgpr_32 = COPY $sgpr59
-    %61:sgpr_32 = COPY $sgpr60
-    %62:sgpr_32 = COPY $sgpr61
-    %63:sgpr_32 = COPY $sgpr62
-    %64:sgpr_32 = COPY $sgpr63
-    %65:sgpr_32 = COPY $sgpr64
-    %66:sgpr_32 = COPY $sgpr65
-    %67:sgpr_32 = COPY $sgpr66
-    %68:sgpr_32 = COPY $sgpr67
-    %69:sgpr_32 = COPY $sgpr68
-    %70:sgpr_32 = COPY $sgpr69
-    %71:sgpr_32 = COPY $sgpr70
-    %72:sgpr_32 = COPY $sgpr71
-    %73:sgpr_32 = COPY $sgpr72
-    %74:sgpr_32 = COPY $sgpr73
-    %75:sgpr_32 = COPY $sgpr74
-    %76:sgpr_32 = COPY $sgpr75
-    %77:sgpr_32 = COPY $sgpr76
-    %78:sgpr_32 = COPY $sgpr77
-    %79:sgpr_32 = COPY $sgpr78
-    %80:sgpr_32 = COPY $sgpr79
-    %81:sgpr_32 = COPY $sgpr80
-    %82:vgpr_32 = GLOBAL_LOAD_DWORD %0, 0, 0, 0, 0, implicit $exec :: (load 4, align 4, addrspace 1)
-    %83:vgpr_32 = GLOBAL_LOAD_DWORD %0, 8, 0, 0, 0, implicit $exec :: (load 4, align 4, addrspace 1)
-    S_NOP 0, implicit %1
-    S_NOP 0, implicit %2
-    S_NOP 0, implicit %3
-    S_NOP 0, implicit %4
-    S_NOP 0, implicit %5
-    S_NOP 0, implicit %6
-    S_NOP 0, implicit %7
-    S_NOP 0, implicit %8
-    S_NOP 0, implicit %9
-    S_NOP 0, implicit %10
-    S_NOP 0, implicit %11
-    S_NOP 0, implicit %12
-    S_NOP 0, implicit %13
-    S_NOP 0, implicit %14
-    S_NOP 0, implicit %15
-    S_NOP 0, implicit %16
-    S_NOP 0, implicit %17
-    S_NOP 0, implicit %18
-    S_NOP 0, implicit %19
-    S_NOP 0, implicit %20
-    S_NOP 0, implicit %21
-    S_NOP 0, implicit %22
-    S_NOP 0, implicit %23
-    S_NOP 0, implicit %24
-    S_NOP 0, implicit %25
-    S_NOP 0, implicit %26
-    S_NOP 0, implicit %27
-    S_NOP 0, implicit %28
-    S_NOP 0, implicit %29
-    S_NOP 0, implicit %30
-    S_NOP 0, implicit %31
-    S_NOP 0, implicit %32
-    S_NOP 0, implicit %33
-    S_NOP 0, implicit %34
-    S_NOP 0, implicit %35
-    S_NOP 0, implicit %36
-    S_NOP 0, implicit %37
-    S_NOP 0, implicit %38
-    S_NOP 0, implicit %39
-    S_NOP 0, implicit %40
-    S_NOP 0, implicit %41
-    S_NOP 0, implicit %42
-    S_NOP 0, implicit %43
-    S_NOP 0, implicit %44
-    S_NOP 0, implicit %45
-    S_NOP 0, implicit %46
-    S_NOP 0, implicit %47
-    S_NOP 0, implicit %48
-    S_NOP 0, implicit %49
-    S_NOP 0, implicit %50
-    S_NOP 0, implicit %51
-    S_NOP 0, implicit %52
-    S_NOP 0, implicit %53
-    S_NOP 0, implicit %54
-    S_NOP 0, implicit %55
-    S_NOP 0, implicit %56
-    S_NOP 0, implicit %57
-    S_NOP 0, implicit %58
-    S_NOP 0, implicit %59
-    S_NOP 0, implicit %60
-    S_NOP 0, implicit %61
-    S_NOP 0, implicit %62
-    S_NOP 0, implicit %63
-    S_NOP 0, implicit %64
-    S_NOP 0, implicit %65
-    S_NOP 0, implicit %66
-    S_NOP 0, implicit %67
-    S_NOP 0, implicit %68
-    S_NOP 0, implicit %69
-    S_NOP 0, implicit %70
-    S_NOP 0, implicit %71
-    S_NOP 0, implicit %72
-    S_NOP 0, implicit %73
-    S_NOP 0, implicit %74
-    S_NOP 0, implicit %75
-    S_NOP 0, implicit %76
-    S_NOP 0, implicit %77
-    S_NOP 0, implicit %78
-    S_NOP 0, implicit %79
-    S_NOP 0, implicit %80
-    S_NOP 0, implicit %81
-    S_ENDPGM 0, implicit %82, implicit %83
-...


        


More information about the llvm-commits mailing list