[llvm] [MachineLICM] Give opportunity to analyze physregs for invariance. (PR #84779)

Sander de Smalen via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 12 07:12:26 PDT 2024


https://github.com/sdesmalen-arm updated https://github.com/llvm/llvm-project/pull/84779

>From e81bdb1509013bedea62299aa35c89b56c83eb88 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Fri, 8 Mar 2024 16:41:07 +0000
Subject: [PATCH 1/2] [MachineLICM] Give opportunity to analyze physregs for
 invariance.

At the moment MachineLoopInfo has a very simple way to determine if
a use of a physical register will be invariant: if it is not a constant
value or if it's not an ignorable use, then it's not considered invariant.

>From a compile-time performance perspective this makes a lot of sense,
but it limits code that uses implicit physical registers from being
hoisted until the later MachineLICM pass (after register allocation),
which has a lot fewer opportunities to hoist.

For AArch64 SME we use an implicit physical register ($vg) to avoid
rematerialization beyond certain instructions. Doing this led to
regressions because simple expressions were no longer hoisted by Early
MachineLICM.

This patch adds some extra checks to 'isLoopInvariant' to see if any of the
defs are found in the loop. If not, we can considered it loop invariant.

We expect the impact on compile-time to be negligible because there is
an incentive for users to reduce the need for the smstart/smstop
instructions that define $vg. In either case, we've put the functionality
under a target interface to limit this only to specific registers.
---
 llvm/include/llvm/CodeGen/MachineLoopInfo.h   |  3 +
 .../include/llvm/CodeGen/TargetRegisterInfo.h |  4 ++
 llvm/lib/CodeGen/MachineLoopInfo.cpp          | 19 +++++-
 .../Target/AArch64/AArch64RegisterInfo.cpp    |  5 ++
 llvm/lib/Target/AArch64/AArch64RegisterInfo.h |  2 +
 .../CodeGen/AArch64/sme-machine-licm-vg.mir   | 64 +++++++++++++++++++
 6 files changed, 96 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sme-machine-licm-vg.mir

diff --git a/llvm/include/llvm/CodeGen/MachineLoopInfo.h b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
index 445c9b1c3bc00f..967c4a70ca4699 100644
--- a/llvm/include/llvm/CodeGen/MachineLoopInfo.h
+++ b/llvm/include/llvm/CodeGen/MachineLoopInfo.h
@@ -89,6 +89,9 @@ class MachineLoop : public LoopBase<MachineBasicBlock, MachineLoop> {
 private:
   friend class LoopInfoBase<MachineBasicBlock, MachineLoop>;
 
+  /// Returns true if the given physreg has no defs inside the loop.
+  bool isLoopInvariantImplicitPhysReg(Register Reg) const;
+
   explicit MachineLoop(MachineBasicBlock *MBB)
     : LoopBase<MachineBasicBlock, MachineLoop>(MBB) {}
 
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index e7c9ecd2e1851a..bf7e770aea8091 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -572,6 +572,10 @@ class TargetRegisterInfo : public MCRegisterInfo {
     return false;
   }
 
+  /// Returns true if MachineLoopInfo should analyze the given physreg
+  /// for loop invariance.
+  virtual bool shouldAnalyzePhysregInMachineLoopInfo(MCRegister R) const { return false; }
+
   /// Physical registers that may be modified within a function but are
   /// guaranteed to be restored before any uses. This is useful for targets that
   /// have call sequences where a GOT register may be updated by the caller
diff --git a/llvm/lib/CodeGen/MachineLoopInfo.cpp b/llvm/lib/CodeGen/MachineLoopInfo.cpp
index 1492c8c366fb41..6e7f4062f7e0f5 100644
--- a/llvm/lib/CodeGen/MachineLoopInfo.cpp
+++ b/llvm/lib/CodeGen/MachineLoopInfo.cpp
@@ -198,6 +198,23 @@ MDNode *MachineLoop::getLoopID() const {
   return LoopID;
 }
 
+bool MachineLoop::isLoopInvariantImplicitPhysReg(Register Reg) const {
+  MachineFunction *MF = getHeader()->getParent();
+  MachineRegisterInfo *MRI = &MF->getRegInfo();
+
+  if (MRI->isConstantPhysReg(Reg))
+    return true;
+
+  if (!MF->getSubtarget()
+           .getRegisterInfo()
+           ->shouldAnalyzePhysregInMachineLoopInfo(Reg))
+    return false;
+
+  return !llvm::any_of(
+      MRI->def_instructions(Reg),
+      [this, Reg](const MachineInstr &MI) { return this->contains(&MI); });
+}
+
 bool MachineLoop::isLoopInvariant(MachineInstr &I,
                                   const Register ExcludeReg) const {
   MachineFunction *MF = I.getParent()->getParent();
@@ -226,7 +243,7 @@ bool MachineLoop::isLoopInvariant(MachineInstr &I,
         // it could get allocated to something with a def during allocation.
         // However, if the physreg is known to always be caller saved/restored
         // then this use is safe to hoist.
-        if (!MRI->isConstantPhysReg(Reg) &&
+        if (!isLoopInvariantImplicitPhysReg(Reg) &&
             !(TRI->isCallerPreservedPhysReg(Reg.asMCReg(), *I.getMF())) &&
             !TII->isIgnorableUse(MO))
           return false;
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index ec59778cbb0efa..ad29003f1e8173 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -1072,3 +1072,8 @@ bool AArch64RegisterInfo::shouldCoalesce(
 
   return true;
 }
+
+bool AArch64RegisterInfo::shouldAnalyzePhysregInMachineLoopInfo(
+    MCRegister R) const {
+  return R == AArch64::VG;
+}
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
index e93352c13b3c72..5c8a5e029584fc 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.h
@@ -145,6 +145,8 @@ class AArch64RegisterInfo final : public AArch64GenRegisterInfo {
 
   void getOffsetOpcodes(const StackOffset &Offset,
                         SmallVectorImpl<uint64_t> &Ops) const override;
+
+  bool shouldAnalyzePhysregInMachineLoopInfo(MCRegister R) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/test/CodeGen/AArch64/sme-machine-licm-vg.mir b/llvm/test/CodeGen/AArch64/sme-machine-licm-vg.mir
new file mode 100644
index 00000000000000..e6cce9af0daf0e
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sme-machine-licm-vg.mir
@@ -0,0 +1,64 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -mtriple=aarch64--linux-gnu -run-pass=early-machinelicm %s -verify-machineinstrs -o - | FileCheck %s
+---
+name:            test_should_hoist_pfalse
+tracksRegLiveness: true
+body:             |
+  ; CHECK-LABEL: name: test_should_hoist_pfalse
+  ; CHECK: bb.0:
+  ; CHECK-NEXT:   successors: %bb.1(0x80000000)
+  ; CHECK-NEXT:   liveins: $x0, $x1
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY $x1
+  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64 = COPY $x0
+  ; CHECK-NEXT:   MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:gpr64all = COPY [[COPY1]]
+  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:gpr64all = COPY [[COPY]]
+  ; CHECK-NEXT:   [[PFALSE:%[0-9]+]]:ppr = PFALSE implicit $vg
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.1:
+  ; CHECK-NEXT:   successors: %bb.2(0x40000000), %bb.1(0x40000000)
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT:   [[PHI:%[0-9]+]]:gpr64common = PHI [[COPY2]], %bb.0, %5, %bb.1
+  ; CHECK-NEXT:   [[PHI1:%[0-9]+]]:gpr64sp = PHI [[COPY3]], %bb.0, %7, %bb.1
+  ; CHECK-NEXT:   STR_PXI [[PFALSE]], [[PHI]], 0
+  ; CHECK-NEXT:   [[SUBSXri:%[0-9]+]]:gpr64 = SUBSXri [[PHI1]], 1, 0, implicit-def $nzcv
+  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:gpr64all = COPY [[SUBSXri]]
+  ; CHECK-NEXT:   [[INCD_XPiI:%[0-9]+]]:gpr64 = INCD_XPiI [[PHI]], 31, 1
+  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:gpr64all = COPY [[INCD_XPiI]]
+  ; CHECK-NEXT:   Bcc 1, %bb.1, implicit $nzcv
+  ; CHECK-NEXT:   B %bb.2
+  ; CHECK-NEXT: {{  $}}
+  ; CHECK-NEXT: bb.2:
+  ; CHECK-NEXT:   MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+  ; CHECK-NEXT:   RET_ReallyLR
+  bb.0:
+    successors: %bb.1
+    liveins: $x0, $x1
+
+    %5:gpr64 = COPY $x1
+    %4:gpr64 = COPY $x0
+    MSRpstatesvcrImm1 1, 1, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+    %6:gpr64all = COPY %4
+    %7:gpr64all = COPY %5
+
+  bb.1:
+    successors: %bb.2, %bb.1
+
+    %0:gpr64common = PHI %6, %bb.0, %3, %bb.1
+    %1:gpr64sp = PHI %7, %bb.0, %2, %bb.1
+    %8:ppr = PFALSE implicit $vg
+    STR_PXI killed %8, %0, 0
+    %9:gpr64 = SUBSXri %1, 1, 0, implicit-def $nzcv
+    %2:gpr64all = COPY %9
+    %10:gpr64 = INCD_XPiI %0, 31, 1
+    %3:gpr64all = COPY %10
+
+
+    Bcc 1, %bb.1, implicit $nzcv
+    B %bb.2
+
+  bb.2:
+    MSRpstatesvcrImm1 1, 0, csr_aarch64_smstartstop, implicit-def dead $nzcv, implicit $vg, implicit-def $vg
+    RET_ReallyLR
+...

>From 4f1544d780bbbe533110663e4f45b36487988023 Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen at arm.com>
Date: Tue, 12 Mar 2024 14:11:12 +0000
Subject: [PATCH 2/2] NFC: run clang-format

---
 llvm/include/llvm/CodeGen/TargetRegisterInfo.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index bf7e770aea8091..117d3f71829747 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -574,7 +574,9 @@ class TargetRegisterInfo : public MCRegisterInfo {
 
   /// Returns true if MachineLoopInfo should analyze the given physreg
   /// for loop invariance.
-  virtual bool shouldAnalyzePhysregInMachineLoopInfo(MCRegister R) const { return false; }
+  virtual bool shouldAnalyzePhysregInMachineLoopInfo(MCRegister R) const {
+    return false;
+  }
 
   /// Physical registers that may be modified within a function but are
   /// guaranteed to be restored before any uses. This is useful for targets that



More information about the llvm-commits mailing list