[llvm] b9e433b - Prevent machine licm if remattable with a vreg use

Stanislav Mekhanoshin via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 16 12:18:41 PDT 2021


Author: Stanislav Mekhanoshin
Date: 2021-08-16T12:09:00-07:00
New Revision: b9e433b02a77830b9ba13406b459ab905371e346

URL: https://github.com/llvm/llvm-project/commit/b9e433b02a77830b9ba13406b459ab905371e346
DIFF: https://github.com/llvm/llvm-project/commit/b9e433b02a77830b9ba13406b459ab905371e346.diff

LOG: Prevent machine licm if remattable with a vreg use

Check if a remateralizable nstruction does not have any virtual
register uses. Even though rematerializable RA might not actually
rematerialize it in this scenario. In that case we do not want to
hoist such instruction out of the loop in a believe RA will sink
it back if needed.

This already has impact on AMDGPU target which does not check for
this condition in its isTriviallyReMaterializable implementation
and have instructions with virtual register uses enabled. The
other targets are not impacted at this point although will be when
D106408 lands.

Differential Revision: https://reviews.llvm.org/D107677

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineLICM.cpp
    llvm/test/CodeGen/AMDGPU/licm-regpressure.mir

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineLICM.cpp b/llvm/lib/CodeGen/MachineLICM.cpp
index 883299c452b71..42708659c79e1 100644
--- a/llvm/lib/CodeGen/MachineLICM.cpp
+++ b/llvm/lib/CodeGen/MachineLICM.cpp
@@ -230,6 +230,9 @@ namespace {
 
     bool IsGuaranteedToExecute(MachineBasicBlock *BB);
 
+    bool isTriviallyReMaterializable(const MachineInstr &MI,
+                                     AAResults *AA) const;
+
     void EnterScope(MachineBasicBlock *MBB);
 
     void ExitScope(MachineBasicBlock *MBB);
@@ -659,6 +662,23 @@ bool MachineLICMBase::IsGuaranteedToExecute(MachineBasicBlock *BB) {
   return true;
 }
 
+/// Check if \p MI is trivially remateralizable and if it does not have any
+/// virtual register uses. Even though rematerializable RA might not actually
+/// rematerialize it in this scenario. In that case we do not want to hoist such
+/// instruction out of the loop in a belief RA will sink it back if needed.
+bool MachineLICMBase::isTriviallyReMaterializable(const MachineInstr &MI,
+                                                  AAResults *AA) const {
+  if (!TII->isTriviallyReMaterializable(MI, AA))
+    return false;
+
+  for (const MachineOperand &MO : MI.operands()) {
+    if (MO.isReg() && MO.isUse() && MO.getReg().isVirtual())
+      return false;
+  }
+
+  return true;
+}
+
 void MachineLICMBase::EnterScope(MachineBasicBlock *MBB) {
   LLVM_DEBUG(dbgs() << "Entering " << printMBBReference(*MBB) << '\n');
 
@@ -1156,9 +1176,9 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
     return false;
   }
 
-  // Rematerializable instructions should always be hoisted since the register
-  // allocator can just pull them down again when needed.
-  if (TII->isTriviallyReMaterializable(MI, AA))
+  // Rematerializable instructions should always be hoisted providing the
+  // register allocator can just pull them down again when needed.
+  if (isTriviallyReMaterializable(MI, AA))
     return true;
 
   // FIXME: If there are long latency loop-invariant instructions inside the
@@ -1211,7 +1231,7 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
 
   // High register pressure situation, only hoist if the instruction is going
   // to be remat'ed.
-  if (!TII->isTriviallyReMaterializable(MI, AA) &&
+  if (!isTriviallyReMaterializable(MI, AA) &&
       !MI.isDereferenceableInvariantLoad(AA)) {
     LLVM_DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
     return false;

diff  --git a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir
index dcdbe6bd02dcd..1d033e117ede7 100644
--- a/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir
+++ b/llvm/test/CodeGen/AMDGPU/licm-regpressure.mir
@@ -1,8 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass machinelicm -o - %s | FileCheck -check-prefix=GCN %s
 
-# FIXME: MachineLICM hoists all V_CVT instructions out of the loop increasing
-# register pressure. VGPR budget at occupancy 10 is 24 vgprs.
+# MachineLICM shall limit hoisting of V_CVT instructions out of the loop keeping
+# register pressure within the budget. VGPR budget at occupancy 10 is 24 vgprs.
 
 ---
 name:            test
@@ -35,41 +35,41 @@ body:             |
   ; GCN:   %20:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY2]], implicit $mode, implicit $exec
   ; GCN:   %21:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY3]], implicit $mode, implicit $exec
   ; GCN:   %22:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY4]], implicit $mode, implicit $exec
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.2(0x04000000), %bb.1(0x7c000000)
+  ; GCN:   liveins: $vcc
+  ; GCN:   $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %18, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %19, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %20, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %21, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %22, implicit $exec
   ; GCN:   %23:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY5]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %23, implicit $exec
   ; GCN:   %24:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY6]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %24, implicit $exec
   ; GCN:   %25:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY7]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %25, implicit $exec
   ; GCN:   %26:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY8]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %26, implicit $exec
   ; GCN:   %27:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY9]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %27, implicit $exec
   ; GCN:   %28:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY10]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %28, implicit $exec
   ; GCN:   %29:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY11]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %29, implicit $exec
   ; GCN:   %30:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY12]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %30, implicit $exec
   ; GCN:   %31:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY13]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %31, implicit $exec
   ; GCN:   %32:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY14]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %32, implicit $exec
   ; GCN:   %33:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY15]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %33, implicit $exec
   ; GCN:   %34:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY16]], implicit $mode, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %34, implicit $exec
   ; GCN:   %35:vreg_64 = nofpexcept V_CVT_F64_I32_e32 [[COPY17]], implicit $mode, implicit $exec
-  ; GCN: bb.1:
-  ; GCN:   successors: %bb.2(0x04000000), %bb.1(0x7c000000)
-  ; GCN:   liveins: $vcc
-  ; GCN:   $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %18, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %19, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %20, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %21, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %22, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %23, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %24, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %25, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %26, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %27, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %28, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %29, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %30, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %31, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %32, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %33, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %34, implicit $exec
-  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, %35, implicit $exec
+  ; GCN:   $vcc = V_CMP_EQ_U64_e64 $vcc, killed %35, implicit $exec
   ; GCN:   S_CBRANCH_VCCNZ %bb.1, implicit $vcc
   ; GCN:   S_BRANCH %bb.2
   ; GCN: bb.2:


        


More information about the llvm-commits mailing list