[llvm] [MachineScheduler][NFCI] Add Offset and OffsetIsScalable args to shouldClusterMemOps (PR #73778)

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 6 04:20:25 PST 2023


https://github.com/asb updated https://github.com/llvm/llvm-project/pull/73778

>From ecc1ca6822882fb4a4287ae1eaad1896e1d52ff5 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 29 Nov 2023 10:38:57 +0000
Subject: [PATCH 1/5] [MachineScheduler][NFCI] Add Offset and OffsetIsScalable
 args to shouldClusterMemOps

These are picked up from getMemOperandsWithOffsetWidth but weren't then
being passed through to shouldClusterMemOps, which forces backends to
collect the information again if they want to use the kind of heuristics
typically used for the similar shouldScheduleLoadsNear function (e.g.
checking the offset is within 1 cache line).

This patch just adds the parameters, but doesn't attempt to use them.
There is an opportunity to use them in the current PPC and AArch64
shouldClusterMemOps implementation, and I intend to use the offset in
the heuristic for RISC-V. I've left these for future patches in the
interest of being as incremental as possible.
---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h    |  2 ++
 llvm/lib/CodeGen/MachineScheduler.cpp          | 14 +++++++++-----
 llvm/lib/Target/AArch64/AArch64InstrInfo.cpp   |  5 +++--
 llvm/lib/Target/AArch64/AArch64InstrInfo.h     |  2 ++
 llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp |  5 ++++-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp         |  2 ++
 llvm/lib/Target/AMDGPU/SIInstrInfo.h           |  2 ++
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp       |  5 +++--
 llvm/lib/Target/PowerPC/PPCInstrInfo.h         |  2 ++
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp       |  5 +++--
 llvm/lib/Target/RISCV/RISCVInstrInfo.h         |  2 ++
 11 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 665b7449ddb82..b7dc56f93c739 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1501,7 +1501,9 @@ class TargetInstrInfo : public MCInstrInfo {
   /// \p NumBytes is the number of bytes that will be loaded from all the
   /// clustered loads if this hook returns true.
   virtual bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+                                   int64_t Offset1, bool OffsetIsScalable1,
                                    ArrayRef<const MachineOperand *> BaseOps2,
+                                   int64_t Offset2, bool OffsetIsScalable2,
                                    unsigned ClusterSize,
                                    unsigned NumBytes) const {
     llvm_unreachable("target did not implement shouldClusterMemOps()");
diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index 4add33ba0996a..cd5fe71ef0c1a 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -1698,11 +1698,12 @@ class BaseMemOpClusterMutation : public ScheduleDAGMutation {
     SmallVector<const MachineOperand *, 4> BaseOps;
     int64_t Offset;
     unsigned Width;
+    bool OffsetIsScalable;
 
     MemOpInfo(SUnit *SU, ArrayRef<const MachineOperand *> BaseOps,
-              int64_t Offset, unsigned Width)
+              int64_t Offset, bool OffsetIsScalable, unsigned Width)
         : SU(SU), BaseOps(BaseOps.begin(), BaseOps.end()), Offset(Offset),
-          Width(Width) {}
+          OffsetIsScalable(OffsetIsScalable), Width(Width) {}
 
     static bool Compare(const MachineOperand *const &A,
                         const MachineOperand *const &B) {
@@ -1831,8 +1832,10 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
           SUnit2ClusterInfo[MemOpa.SU->NodeNum].second + MemOpb.Width;
     }
 
-    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpb.BaseOps, ClusterLength,
-                                  CurrentClusterBytes))
+    if (!TII->shouldClusterMemOps(MemOpa.BaseOps, MemOpa.Offset,
+                                  MemOpa.OffsetIsScalable, MemOpb.BaseOps,
+                                  MemOpb.Offset, MemOpb.OffsetIsScalable,
+                                  ClusterLength, CurrentClusterBytes))
       continue;
 
     SUnit *SUa = MemOpa.SU;
@@ -1899,7 +1902,8 @@ void BaseMemOpClusterMutation::collectMemOpRecords(
     unsigned Width;
     if (TII->getMemOperandsWithOffsetWidth(MI, BaseOps, Offset,
                                            OffsetIsScalable, Width, TRI)) {
-      MemOpRecords.push_back(MemOpInfo(&SU, BaseOps, Offset, Width));
+      MemOpRecords.push_back(
+          MemOpInfo(&SU, BaseOps, Offset, OffsetIsScalable, Width));
 
       LLVM_DEBUG(dbgs() << "Num BaseOps: " << BaseOps.size() << ", Offset: "
                         << Offset << ", OffsetIsScalable: " << OffsetIsScalable
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
index e97f17e3f49c5..6b49e17528ada 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -4230,8 +4230,9 @@ static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
 ///
 /// Only called for LdSt for which getMemOperandWithOffset returns true.
 bool AArch64InstrInfo::shouldClusterMemOps(
-    ArrayRef<const MachineOperand *> BaseOps1,
-    ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
+    ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
+    bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+    int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
     unsigned NumBytes) const {
   assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
   const MachineOperand &BaseOp1 = *BaseOps1.front();
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
index cc588cdad6b8e..65e5fb49536da 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h
@@ -179,7 +179,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
                            int64_t &MinOffset, int64_t &MaxOffset);
 
   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+                           int64_t Offset1, bool OffsetIsScalable1,
                            ArrayRef<const MachineOperand *> BaseOps2,
+                           int64_t Offset2, bool OffsetIsScalable2,
                            unsigned ClusterSize,
                            unsigned NumBytes) const override;
 
diff --git a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
index 50f8ad4433c6d..442ae4dd7b34f 100644
--- a/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp
@@ -232,7 +232,10 @@ class SIInsertHardClauses : public MachineFunctionPass {
               // scheduler it limits the size of the cluster to avoid increasing
               // register pressure too much, but this pass runs after register
               // allocation so there is no need for that kind of limit.
-              !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
+              // We also lie about the Offset and OffsetIsScalable parameters,
+              // as they aren't used in the SIInstrInfo implementation.
+              !SII->shouldClusterMemOps(CI.BaseOps, 0, false, BaseOps, 0, false,
+                                        2, 2)))) {
           // Finish the current clause.
           Changed |= emitClause(CI, SII);
           CI = ClauseInfo();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index b5b456d691254..0a06fa88b6b10 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -541,7 +541,9 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
 }
 
 bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+                                      int64_t Offset1, bool OffsetIsScalable1,
                                       ArrayRef<const MachineOperand *> BaseOps2,
+                                      int64_t Offset2, bool OffsetIsScalable2,
                                       unsigned ClusterSize,
                                       unsigned NumBytes) const {
   // If the mem ops (to be clustered) do not have the same base ptr, then they
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index eba817756e9c5..6da4f74dfe5f3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -234,7 +234,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
       const TargetRegisterInfo *TRI) const final;
 
   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+                           int64_t Offset1, bool OffsetIsScalable1,
                            ArrayRef<const MachineOperand *> BaseOps2,
+                           int64_t Offset2, bool OffsetIsScalable2,
                            unsigned ClusterSize,
                            unsigned NumBytes) const override;
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 6784049348b16..0de795d9d5e81 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2878,8 +2878,9 @@ static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,
 }
 
 bool PPCInstrInfo::shouldClusterMemOps(
-    ArrayRef<const MachineOperand *> BaseOps1,
-    ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
+    ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
+    bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+    int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
     unsigned NumBytes) const {
 
   assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
index 31e9859a41739..0c9ad607418ec 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h
@@ -530,7 +530,9 @@ class PPCInstrInfo : public PPCGenInstrInfo {
   /// Returns true if the two given memory operations should be scheduled
   /// adjacent.
   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+                           int64_t Offset1, bool OffsetIsScalable1,
                            ArrayRef<const MachineOperand *> BaseOps2,
+                           int64_t Offset2, bool OffsetIsScalable2,
                            unsigned ClusterSize,
                            unsigned NumBytes) const override;
 
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 2918e5654db4f..f596a4cd37528 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2266,8 +2266,9 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
 }
 
 bool RISCVInstrInfo::shouldClusterMemOps(
-    ArrayRef<const MachineOperand *> BaseOps1,
-    ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
+    ArrayRef<const MachineOperand *> BaseOps1, int64_t Offset1,
+    bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
+    int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize,
     unsigned NumBytes) const {
   // If the mem ops (to be clustered) do not have the same base ptr, then they
   // should not be clustered
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 0954286a419bd..7e1d3f3118065 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -158,7 +158,9 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
       const TargetRegisterInfo *TRI) const override;
 
   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+                           int64_t Offset1, bool OffsetIsScalable1,
                            ArrayRef<const MachineOperand *> BaseOps2,
+                           int64_t Offset2, bool OffsetIsScalable2,
                            unsigned ClusterSize,
                            unsigned NumBytes) const override;
 

>From 5a1d0a9e7a893ae90d7b71cc4b819ef75487d23f Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 29 Nov 2023 11:09:20 +0000
Subject: [PATCH 2/5] Add missing doc comment change

---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index b7dc56f93c739..afff4e5894e85 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1496,6 +1496,9 @@ class TargetInstrInfo : public MCInstrInfo {
   /// to TargetPassConfig::createMachineScheduler() to have an effect.
   ///
   /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
+  /// \p Offset1 and \p Offset2 are the byte offsets for the memory
+  /// operations, while \p OffsetIsScalable1 and \p OffsetIsScalable2 indicate
+  /// if the offset is scaled gby a runtime quantity.
   /// \p ClusterSize is the number of operations in the resulting load/store
   /// cluster if this hook returns true.
   /// \p NumBytes is the number of bytes that will be loaded from all the

>From 2cada4e7e8754f556d7c8a9b2120a7f7747c981a Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 29 Nov 2023 11:12:17 +0000
Subject: [PATCH 3/5] Note that Offset2 will never be less than Offset1

---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index afff4e5894e85..ba7f70d66a6c0 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1497,8 +1497,10 @@ class TargetInstrInfo : public MCInstrInfo {
   ///
   /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
   /// \p Offset1 and \p Offset2 are the byte offsets for the memory
-  /// operations, while \p OffsetIsScalable1 and \p OffsetIsScalable2 indicate
-  /// if the offset is scaled gby a runtime quantity.
+  /// operations, and \p Offset2 is guaranteed to be greater than or equal to
+  /// Offset1.
+  /// \p OffsetIsScalable1 and \p OffsetIsScalable2 indicate if the offset is
+  /// scaled by a runtime quantity.
   /// \p ClusterSize is the number of operations in the resulting load/store
   /// cluster if this hook returns true.
   /// \p NumBytes is the number of bytes that will be loaded from all the

>From 3b552e80d6bd43736abba4de388271628b9c74af Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 29 Nov 2023 12:12:39 +0000
Subject: [PATCH 4/5] Actually Offset2 may be less than Offset1

---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index ba7f70d66a6c0..4ec46f9bde1ad 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1497,8 +1497,7 @@ class TargetInstrInfo : public MCInstrInfo {
   ///
   /// \p BaseOps1 and \p BaseOps2 are memory operands of two memory operations.
   /// \p Offset1 and \p Offset2 are the byte offsets for the memory
-  /// operations, and \p Offset2 is guaranteed to be greater than or equal to
-  /// Offset1.
+  /// operations.
   /// \p OffsetIsScalable1 and \p OffsetIsScalable2 indicate if the offset is
   /// scaled by a runtime quantity.
   /// \p ClusterSize is the number of operations in the resulting load/store

>From 6083ea31e5128635861a924ce8ea05a340a9ac08 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 6 Dec 2023 12:19:59 +0000
Subject: [PATCH 5/5] Add FIXME about moving to something ElementCount-style

---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 4ec46f9bde1ad..0568af8a7db77 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1414,6 +1414,8 @@ class TargetInstrInfo : public MCInstrInfo {
   /// Get the base operand and byte offset of an instruction that reads/writes
   /// memory. This is a convenience function for callers that are only prepared
   /// to handle a single base operand.
+  /// FIXME: Move Offset and OffsetIsScalable to some ElementCount-style
+  /// abstraction that supports negative offsets.
   bool getMemOperandWithOffset(const MachineInstr &MI,
                                const MachineOperand *&BaseOp, int64_t &Offset,
                                bool &OffsetIsScalable,
@@ -1426,6 +1428,8 @@ class TargetInstrInfo : public MCInstrInfo {
   /// It returns false if base operands and offset could not be determined.
   /// It is not guaranteed to always recognize base operands and offsets in all
   /// cases.
+  /// FIXME: Move Offset and OffsetIsScalable to some ElementCount-style
+  /// abstraction that supports negative offsets.
   virtual bool getMemOperandsWithOffsetWidth(
       const MachineInstr &MI, SmallVectorImpl<const MachineOperand *> &BaseOps,
       int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,



More information about the llvm-commits mailing list