[llvm] [RISCV] add load/store misched/PostRA cluster options (PR #149409)

Daniel Henrique Barboza via llvm-commits llvm-commits at lists.llvm.org
Fri Jul 18 11:20:24 PDT 2025


https://github.com/danielhb updated https://github.com/llvm/llvm-project/pull/149409

>From 9de53291604b926eb3bead15c55da5c8a1ad66f8 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Mon, 2 Jun 2025 12:07:53 -0700
Subject: [PATCH 1/2] [RISCV] add load/store misched/PostRA cluster options

Some processors benefit more from store clustering than load clustering,
and vice-versa, depending on factors that are exclusive to each one
(e.g. macrofusions implemented).

Likewise, certain optimizations benefits more from misched clustering
than postRA clustering. Macrofusions are again an example: in a
processor with store pair macrofusions, like the veyron-v1, it is
observed that misched clustering increases the amount of macrofusions
more than postRA clustering.  This of course isn't necessarily true for
other processors, but it shows that processors can benefit from a more
fine grained control of clustering mutations, and each one is able to do
it differently.

Add 4 new clustering options that deprecates the existing
riscv-misched-load-store-clustering and
riscv-postmisched-load-store-clustering options:

- riscv-misched-load-clustering and riscv-misched-store-clustering:
  enable/disable load/store clustering during misched;

- riscv-postmisched-load-clustering and
  riscv-postmisched-store-clustering: enable/disable load/store
  clustering during PostRA.

To preserve the existing clustering behavior all 4 options are defaulted to
'true'.
---
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  | 34 +++++++++++++------
 .../CodeGen/RISCV/misched-load-clustering.ll  |  6 ++--
 .../CodeGen/RISCV/misched-mem-clustering.mir  |  6 ++--
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b43b915d0ad4f..4c0beb3e0b0bc 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -94,14 +94,24 @@ static cl::opt<bool>
                            cl::desc("Enable the loop data prefetch pass"),
                            cl::init(true));
 
-static cl::opt<bool> EnableMISchedLoadStoreClustering(
-    "riscv-misched-load-store-clustering", cl::Hidden,
-    cl::desc("Enable load and store clustering in the machine scheduler"),
+static cl::opt<bool> EnableMISchedLoadClustering(
+    "riscv-misched-load-clustering", cl::Hidden,
+    cl::desc("Enable load clustering in the machine scheduler"),
     cl::init(true));
 
-static cl::opt<bool> EnablePostMISchedLoadStoreClustering(
-    "riscv-postmisched-load-store-clustering", cl::Hidden,
-    cl::desc("Enable PostRA load and store clustering in the machine scheduler"),
+static cl::opt<bool> EnableMISchedStoreClustering(
+    "riscv-misched-store-clustering", cl::Hidden,
+    cl::desc("Enable store clustering in the machine scheduler"),
+    cl::init(true));
+
+static cl::opt<bool> EnablePostMISchedLoadClustering(
+    "riscv-postmisched-load-clustering", cl::Hidden,
+    cl::desc("Enable PostRA load clustering in the machine scheduler"),
+    cl::init(true));
+
+static cl::opt<bool> EnablePostMISchedStoreClustering(
+    "riscv-postmisched-store-clustering", cl::Hidden,
+    cl::desc("Enable PostRA store clustering in the machine scheduler"),
     cl::init(true));
 
 static cl::opt<bool>
@@ -300,12 +310,14 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
 ScheduleDAGInstrs *
 RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
   ScheduleDAGMILive *DAG = createSchedLive(C);
-  if (EnableMISchedLoadStoreClustering) {
+
+  if (EnableMISchedLoadClustering)
     DAG->addMutation(createLoadClusterDAGMutation(
         DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+
+  if (EnableMISchedStoreClustering)
     DAG->addMutation(createStoreClusterDAGMutation(
         DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
-  }
 
   const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
   if (!DisableVectorMaskMutation && ST.hasVInstructions())
@@ -317,12 +329,14 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
 ScheduleDAGInstrs *
 RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
   ScheduleDAGMI *DAG = createSchedPostRA(C);
-  if (EnablePostMISchedLoadStoreClustering) {
+
+  if (EnablePostMISchedLoadClustering)
     DAG->addMutation(createLoadClusterDAGMutation(
         DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+
+  if (EnablePostMISchedStoreClustering)
     DAG->addMutation(createStoreClusterDAGMutation(
         DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
-  }
 
   return DAG;
 }
diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
index 160f0aefa36a7..7988fcfe5a743 100644
--- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
+++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
@@ -1,8 +1,10 @@
 ; REQUIRES: asserts
-; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \
+; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-clustering=false \
+; RUN:     -riscv-misched-store-clustering=false \
 ; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
 ; RUN:   | FileCheck -check-prefix=NOCLUSTER %s
-; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \
+; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-clustering=false \
+; RUN:     -riscv-misched-store-clustering=false \
 ; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
 ; RUN:   | FileCheck -check-prefix=NOCLUSTER %s
 ; RUN: llc -mtriple=riscv32 -verify-misched \
diff --git a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
index 21398d315ec93..a57f8ef0b58b9 100644
--- a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
+++ b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
@@ -1,10 +1,12 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \
-# RUN:     -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
+# RUN:     -riscv-postmisched-load-clustering=false \
+# RUN:     -riscv-postmisched-store-clustering=false -debug-only=machine-scheduler \
 # RUN:     -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
 # RUN:   | FileCheck -check-prefix=NOPOSTMISCHED %s
 # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
-# RUN:     -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
+# RUN:     -riscv-postmisched-load-clustering=false \
+# RUN:     -riscv-postmisched-store-clustering=false -debug-only=machine-scheduler \
 # RUN:     -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
 # RUN:   | FileCheck -check-prefix=NOCLUSTER %s
 # RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \

>From 7603a095a3dcf5c024ceefdc00a23ad3e953e9c4 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Fri, 18 Jul 2025 11:06:11 -0700
Subject: [PATCH 2/2] misched-load-clustering.ll: exercise new clustering flags

- add store-clustering=false to the existing LDCLUSTER test;
- add a new STCLUSTER test with load-clustering=false;
- add a new DEFAULTCLUSTER test with default clustering options.
---
 .../CodeGen/RISCV/misched-load-clustering.ll  | 35 +++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
index 7988fcfe5a743..3d167bd8ebfd3 100644
--- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
+++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
@@ -7,12 +7,31 @@
 ; RUN:     -riscv-misched-store-clustering=false \
 ; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
 ; RUN:   | FileCheck -check-prefix=NOCLUSTER %s
+;
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN:     -riscv-misched-load-clustering=false \
+; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN:   | FileCheck -check-prefix=STCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN:     -riscv-misched-load-clustering=false \
+; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN:   | FileCheck -check-prefix=STCLUSTER %s
+;
 ; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN:     -riscv-misched-store-clustering=false \
 ; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
 ; RUN:   | FileCheck -check-prefix=LDCLUSTER %s
 ; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN:     -riscv-misched-store-clustering=false \
 ; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
 ; RUN:   | FileCheck -check-prefix=LDCLUSTER %s
+;
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN:   | FileCheck -check-prefix=DEFAULTCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN:     -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN:   | FileCheck -check-prefix=DEFAULTCLUSTER %s
 
 
 define i32 @load_clustering_1(ptr nocapture %p) {
@@ -24,6 +43,14 @@ define i32 @load_clustering_1(ptr nocapture %p) {
 ; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
 ; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
 ;
+; STCLUSTER: ********** MI Scheduling **********
+; STCLUSTER-LABEL: load_clustering_1:%bb.0
+; STCLUSTER: *** Final schedule for %bb.0 ***
+; STCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; STCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; STCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; STCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+;
 ; LDCLUSTER: ********** MI Scheduling **********
 ; LDCLUSTER-LABEL: load_clustering_1:%bb.0
 ; LDCLUSTER: *** Final schedule for %bb.0 ***
@@ -31,6 +58,14 @@ define i32 @load_clustering_1(ptr nocapture %p) {
 ; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
 ; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
 ; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+;
+; DEFAULTCLUSTER: ********** MI Scheduling **********
+; DEFAULTCLUSTER-LABEL: load_clustering_1:%bb.0
+; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
+; DEFAULTCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; DEFAULTCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; DEFAULTCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; DEFAULTCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
 entry:
   %arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
   %val0 = load i32, ptr %arrayidx0



More information about the llvm-commits mailing list