[llvm] [RISCV] add load/store misched/PostRA cluster options (PR #149409)
Daniel Henrique Barboza via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 18 11:20:24 PDT 2025
https://github.com/danielhb updated https://github.com/llvm/llvm-project/pull/149409
>From 9de53291604b926eb3bead15c55da5c8a1ad66f8 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Mon, 2 Jun 2025 12:07:53 -0700
Subject: [PATCH 1/2] [RISCV] add load/store misched/PostRA cluster options
Some processors benefit more from store clustering than load clustering,
and vice-versa, depending on factors that are exclusive to each one
(e.g. macrofusions implemented).
Likewise, certain optimizations benefits more from misched clustering
than postRA clustering. Macrofusions are again an example: in a
processor with store pair macrofusions, like the veyron-v1, it is
observed that misched clustering increases the amount of macrofusions
more than postRA clustering. This of course isn't necessarily true for
other processors, but it shows that processors can benefit from a more
fine grained control of clustering mutations, and each one is able to do
it differently.
Add 4 new clustering options that deprecates the existing
riscv-misched-load-store-clustering and
riscv-postmisched-load-store-clustering options:
- riscv-misched-load-clustering and riscv-misched-store-clustering:
enable/disable load/store clustering during misched;
- riscv-postmisched-load-clustering and
riscv-postmisched-store-clustering: enable/disable load/store
clustering during PostRA.
To preserve the existing clustering behavior all 4 options are defaulted to
'true'.
---
llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 34 +++++++++++++------
.../CodeGen/RISCV/misched-load-clustering.ll | 6 ++--
.../CodeGen/RISCV/misched-mem-clustering.mir | 6 ++--
3 files changed, 32 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index b43b915d0ad4f..4c0beb3e0b0bc 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -94,14 +94,24 @@ static cl::opt<bool>
cl::desc("Enable the loop data prefetch pass"),
cl::init(true));
-static cl::opt<bool> EnableMISchedLoadStoreClustering(
- "riscv-misched-load-store-clustering", cl::Hidden,
- cl::desc("Enable load and store clustering in the machine scheduler"),
+static cl::opt<bool> EnableMISchedLoadClustering(
+ "riscv-misched-load-clustering", cl::Hidden,
+ cl::desc("Enable load clustering in the machine scheduler"),
cl::init(true));
-static cl::opt<bool> EnablePostMISchedLoadStoreClustering(
- "riscv-postmisched-load-store-clustering", cl::Hidden,
- cl::desc("Enable PostRA load and store clustering in the machine scheduler"),
+static cl::opt<bool> EnableMISchedStoreClustering(
+ "riscv-misched-store-clustering", cl::Hidden,
+ cl::desc("Enable store clustering in the machine scheduler"),
+ cl::init(true));
+
+static cl::opt<bool> EnablePostMISchedLoadClustering(
+ "riscv-postmisched-load-clustering", cl::Hidden,
+ cl::desc("Enable PostRA load clustering in the machine scheduler"),
+ cl::init(true));
+
+static cl::opt<bool> EnablePostMISchedStoreClustering(
+ "riscv-postmisched-store-clustering", cl::Hidden,
+ cl::desc("Enable PostRA store clustering in the machine scheduler"),
cl::init(true));
static cl::opt<bool>
@@ -300,12 +310,14 @@ bool RISCVTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
ScheduleDAGInstrs *
RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
ScheduleDAGMILive *DAG = createSchedLive(C);
- if (EnableMISchedLoadStoreClustering) {
+
+ if (EnableMISchedLoadClustering)
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+
+ if (EnableMISchedStoreClustering)
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
- }
const RISCVSubtarget &ST = C->MF->getSubtarget<RISCVSubtarget>();
if (!DisableVectorMaskMutation && ST.hasVInstructions())
@@ -317,12 +329,14 @@ RISCVTargetMachine::createMachineScheduler(MachineSchedContext *C) const {
ScheduleDAGInstrs *
RISCVTargetMachine::createPostMachineScheduler(MachineSchedContext *C) const {
ScheduleDAGMI *DAG = createSchedPostRA(C);
- if (EnablePostMISchedLoadStoreClustering) {
+
+ if (EnablePostMISchedLoadClustering)
DAG->addMutation(createLoadClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
+
+ if (EnablePostMISchedStoreClustering)
DAG->addMutation(createStoreClusterDAGMutation(
DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true));
- }
return DAG;
}
diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
index 160f0aefa36a7..7988fcfe5a743 100644
--- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
+++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
@@ -1,8 +1,10 @@
; REQUIRES: asserts
-; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-store-clustering=false \
+; RUN: llc -mtriple=riscv32 -verify-misched -riscv-misched-load-clustering=false \
+; RUN: -riscv-misched-store-clustering=false \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
-; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-store-clustering=false \
+; RUN: llc -mtriple=riscv64 -verify-misched -riscv-misched-load-clustering=false \
+; RUN: -riscv-misched-store-clustering=false \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
; RUN: llc -mtriple=riscv32 -verify-misched \
diff --git a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
index 21398d315ec93..a57f8ef0b58b9 100644
--- a/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
+++ b/llvm/test/CodeGen/RISCV/misched-mem-clustering.mir
@@ -1,10 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -verify-misched -enable-post-misched=false \
-# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
+# RUN: -riscv-postmisched-load-clustering=false \
+# RUN: -riscv-postmisched-store-clustering=false -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOPOSTMISCHED %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
-# RUN: -riscv-postmisched-load-store-clustering=false -debug-only=machine-scheduler \
+# RUN: -riscv-postmisched-load-clustering=false \
+# RUN: -riscv-postmisched-store-clustering=false -debug-only=machine-scheduler \
# RUN: -start-before=machine-scheduler -stop-after=postmisched -misched-regpressure=false -o - 2>&1 < %s \
# RUN: | FileCheck -check-prefix=NOCLUSTER %s
# RUN: llc -mtriple=riscv64 -x mir -mcpu=sifive-p470 -mattr=+use-postra-scheduler -verify-misched -enable-post-misched=true \
>From 7603a095a3dcf5c024ceefdc00a23ad3e953e9c4 Mon Sep 17 00:00:00 2001
From: Daniel Henrique Barboza <dbarboza at ventanamicro.com>
Date: Fri, 18 Jul 2025 11:06:11 -0700
Subject: [PATCH 2/2] misched-load-clustering.ll: exercise new clustering flags
- add store-clustering=false to the existing LDCLUSTER test;
- add a new STCLUSTER test with load-clustering=false;
- add a new DEFAULTCLUSTER test with default clustering options.
---
.../CodeGen/RISCV/misched-load-clustering.ll | 35 +++++++++++++++++++
1 file changed, 35 insertions(+)
diff --git a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
index 7988fcfe5a743..3d167bd8ebfd3 100644
--- a/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
+++ b/llvm/test/CodeGen/RISCV/misched-load-clustering.ll
@@ -7,12 +7,31 @@
; RUN: -riscv-misched-store-clustering=false \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=NOCLUSTER %s
+;
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -riscv-misched-load-clustering=false \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -riscv-misched-load-clustering=false \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=STCLUSTER %s
+;
; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -riscv-misched-store-clustering=false \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -riscv-misched-store-clustering=false \
; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
; RUN: | FileCheck -check-prefix=LDCLUSTER %s
+;
+; RUN: llc -mtriple=riscv32 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
+; RUN: llc -mtriple=riscv64 -verify-misched \
+; RUN: -debug-only=machine-scheduler -o - 2>&1 < %s \
+; RUN: | FileCheck -check-prefix=DEFAULTCLUSTER %s
define i32 @load_clustering_1(ptr nocapture %p) {
@@ -24,6 +43,14 @@ define i32 @load_clustering_1(ptr nocapture %p) {
; NOCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
; NOCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
;
+; STCLUSTER: ********** MI Scheduling **********
+; STCLUSTER-LABEL: load_clustering_1:%bb.0
+; STCLUSTER: *** Final schedule for %bb.0 ***
+; STCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; STCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; STCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; STCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+;
; LDCLUSTER: ********** MI Scheduling **********
; LDCLUSTER-LABEL: load_clustering_1:%bb.0
; LDCLUSTER: *** Final schedule for %bb.0 ***
@@ -31,6 +58,14 @@ define i32 @load_clustering_1(ptr nocapture %p) {
; LDCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
; LDCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
; LDCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
+;
+; DEFAULTCLUSTER: ********** MI Scheduling **********
+; DEFAULTCLUSTER-LABEL: load_clustering_1:%bb.0
+; DEFAULTCLUSTER: *** Final schedule for %bb.0 ***
+; DEFAULTCLUSTER: SU(4): %4:gpr = LW %0:gpr, 4
+; DEFAULTCLUSTER: SU(2): %2:gpr = LW %0:gpr, 8
+; DEFAULTCLUSTER: SU(1): %1:gpr = LW %0:gpr, 12
+; DEFAULTCLUSTER: SU(5): %6:gpr = LW %0:gpr, 16
entry:
%arrayidx0 = getelementptr inbounds i32, ptr %p, i32 3
%val0 = load i32, ptr %arrayidx0
More information about the llvm-commits
mailing list