[llvm] [RISCV] Support load clustering in the MachineScheduler (off by default) (PR #73754)

Alex Bradbury via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 01:49:30 PST 2023


https://github.com/asb updated https://github.com/llvm/llvm-project/pull/73754

>From f8ce744e9ecc6c4ac184fa50ad8324fb73b8baec Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 29 Nov 2023 06:38:08 +0000
Subject: [PATCH 1/2] [RISCV] Support load clustering in the MachineScheduler
 (off by default)

This adds minimal support for load clustering, but disables it by
default. The intent is to iterate on the precise heuristic and the
question of turning this on by default in a separate PR. Although
previous discussion indicates hope that the MachineScheduler would
replace most uses of the SelectionDAG scheduler, it does seem most
targets aren't using MachineScheduler load clustering right now:
PPC+AArch64 seem to just use it to help with paired load/store formation
and although AMDGPU uses it for general clustering it also implements
ShouldScheduleLoadsNear for the SelectionDAG scheduler's clustering.
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      | 55 +++++++++++++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfo.h        |  4 ++
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  | 15 ++++-
 .../CodeGen/RISCV/misched-load-clustering.ll  | 43 +++++++++++++++
 4 files changed, 114 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/misched-load-clustering.ll

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 6c5712dc795bc75..29fa3987938d9e5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/MemoryLocation.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/LiveIntervals.h"
 #include "llvm/CodeGen/LiveVariables.h"
 #include "llvm/CodeGen/MachineCombinerPattern.h"
@@ -2231,6 +2232,60 @@ bool RISCVInstrInfo::getMemOperandsWithOffsetWidth(
   return true;
 }
 
+// TODO: This was copied from SIInstrInfo. Could it be lifted to a common
+// helper?
+static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
+                                  ArrayRef<const MachineOperand *> BaseOps1,
+                                  const MachineInstr &MI2,
+                                  ArrayRef<const MachineOperand *> BaseOps2) {
+  // Only examine the first "base" operand of each instruction, on the
+  // assumption that it represents the real base address of the memory access.
+  // Other operands are typically offsets or indices from this base address.
+  if (BaseOps1.front()->isIdenticalTo(*BaseOps2.front()))
+    return true;
+
+  if (!MI1.hasOneMemOperand() || !MI2.hasOneMemOperand())
+    return false;
+
+  auto MO1 = *MI1.memoperands_begin();
+  auto MO2 = *MI2.memoperands_begin();
+  if (MO1->getAddrSpace() != MO2->getAddrSpace())
+    return false;
+
+  auto Base1 = MO1->getValue();
+  auto Base2 = MO2->getValue();
+  if (!Base1 || !Base2)
+    return false;
+  Base1 = getUnderlyingObject(Base1);
+  Base2 = getUnderlyingObject(Base2);
+
+  if (isa<UndefValue>(Base1) || isa<UndefValue>(Base2))
+    return false;
+
+  return Base1 == Base2;
+}
+
+bool RISCVInstrInfo::shouldClusterMemOps(
+    ArrayRef<const MachineOperand *> BaseOps1,
+    ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+    unsigned NumBytes) const {
+  // If the mem ops (to be clustered) do not have the same base ptr, then they
+  // should not be clustered
+  if (!BaseOps1.empty() && !BaseOps2.empty()) {
+    const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
+    const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
+    if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
+      return false;
+  } else if (!BaseOps1.empty() || !BaseOps2.empty()) {
+    // If only one base op is empty, they do not have the same base ptr
+    return false;
+  }
+
+  // TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets
+  // indicate they likely share a cache line.
+  return NumLoads <= 4;
+}
+
 // Set BaseReg (the base register operand), Offset (the byte offset being
 // accessed) and the access Width of the passed instruction that reads/writes
 // memory. Returns false if the instruction does not read/write memory or the
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 8f860077c303170..0e282e90276e9eb 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -157,6 +157,10 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
       int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
       const TargetRegisterInfo *TRI) const override;
 
+  bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
+                           ArrayRef<const MachineOperand *> BaseOps2,
+                           unsigned NumLoads, unsigned NumBytes) const override;
+
   bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
                                     const MachineOperand *&BaseOp,
                                     int64_t &Offset, unsigned &Width,
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 524c1a5ca50c5d9..b6c194f03f54209 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -95,6 +95,11 @@ static cl::opt<bool>
                         cl::desc("Enable Split RegisterAlloc for RVV"),
                         cl::init(false));
 
+static cl::opt<bool> EnableMISchedLoadClustering(
+    "riscv-misched-load-clustering", cl::Hidden,
+    cl::desc("Enable load clustering in the machine scheduler"),
+    cl::init(false));
+
 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
   RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
   RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -345,12 +350,16 @@ class RISCVPassConfig : public TargetPassConfig {
   ScheduleDAGInstrs *
   createMachineScheduler(MachineSchedContext *C) const override {
     const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
+    ScheduleDAGMILive *DAG = nullptr;
+    if (EnableMISchedLoadClustering) {
+      DAG = createGenericSchedLive(C);
+      DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
+    }
     if (ST.hasMacroFusion()) {
-      ScheduleDAGMILive *DAG = createGenericSchedLive(C);
+      DAG = DAG ? DAG : createGenericSchedLive(C);
       DAG->addMutation(createRISCVMacroFusionDAGMutation());
-      return DAG;
     }
-    return nullptr;
+    return DAG;
   }
 
   ScheduleDAGInstrs *
diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
new file mode 100644
index 000000000000000..4eb969a357a9eea
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
@@ -0,0 +1,43 @@
+; REQUIRES: asserts
+; RUN: llc -mtriple=riscv32 -verify-misched -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN:   | FileCheck -check-prefix=NOCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN:   | FileCheck -check-prefix=NOCLUSTER %s
+; RUN: llc -mtriple=riscv32 -riscv-misched-load-clustering -verify-misched \
+; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN:   | FileCheck -check-prefix=LDCLUSTER %s
+; RUN: llc -mtriple=riscv64 -riscv-misched-load-clustering -verify-misched \
+; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN:   | FileCheck -check-prefix=LDCLUSTER %s
+
+
+define i32 @load_clustering_1(ptr nocapture %p) {
+; NOCLUSTER: ********** MI Scheduling **********
+; NOCLUSTER-LABEL: load_clustering_1:%bb.0
+; NOCLUSTER: *** Final schedule for %bb.0 ***
+; NOCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; NOCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+;
+; LDCLUSTER: ********** MI Scheduling **********
+; LDCLUSTER-LABEL: load_clustering_1:%bb.0
+; LDCLUSTER: *** Final schedule for %bb.0 ***
+; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; LDCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+entry:
+  %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
+  %val0 = load i32, i32* %arrayidx0
+  %arrayidx1 = getelementptr inbounds i32, ptr %p, i32 2
+  %val1 = load i32, i32* %arrayidx1
+  %tmp0 = add i32 %val0, %val1
+  %arrayidx2 = getelementptr inbounds i32, ptr %p, i32 1
+  %val2 = load i32, i32* %arrayidx2
+  %tmp1 = add i32 %tmp0, %val2
+  %arrayidx3 = getelementptr inbounds i32, ptr %p, i32 4
+  %val3 = load i32, i32* %arrayidx3
+  %tmp2 = add i32 %tmp1, %val3
+  ret i32 %tmp2
+}

>From 499bebe0be74dce5421f50de98da228c74703884 Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb at igalia.com>
Date: Wed, 29 Nov 2023 09:48:26 +0000
Subject: [PATCH 2/2] NumLoads => ClusterSize to match #73757

---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 4 ++--
 llvm/lib/Target/RISCV/RISCVInstrInfo.h   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 29fa3987938d9e5..2918e5654db4f9f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2267,7 +2267,7 @@ static bool memOpsHaveSameBasePtr(const MachineInstr &MI1,
 
 bool RISCVInstrInfo::shouldClusterMemOps(
     ArrayRef<const MachineOperand *> BaseOps1,
-    ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
+    ArrayRef<const MachineOperand *> BaseOps2, unsigned ClusterSize,
     unsigned NumBytes) const {
   // If the mem ops (to be clustered) do not have the same base ptr, then they
   // should not be clustered
@@ -2283,7 +2283,7 @@ bool RISCVInstrInfo::shouldClusterMemOps(
 
   // TODO: Use a more carefully chosen heuristic, e.g. only cluster if offsets
   // indicate they likely share a cache line.
-  return NumLoads <= 4;
+  return ClusterSize <= 4;
 }
 
 // Set BaseReg (the base register operand), Offset (the byte offset being
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
index 0e282e90276e9eb..d85ba99819d5ecc 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h
@@ -159,7 +159,7 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
 
   bool shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                            ArrayRef<const MachineOperand *> BaseOps2,
-                           unsigned NumLoads, unsigned NumBytes) const override;
+                           unsigned ClusterSize, unsigned NumBytes) const override;
 
   bool getMemOperandWithOffsetWidth(const MachineInstr &LdSt,
                                     const MachineOperand *&BaseOp,



More information about the llvm-commits mailing list