[llvm] 00e7a02 - [ScheduleDAG] Allow disabling the SchedModel / Itineraries during Scheduling (#138057)
via llvm-commits
llvm-commits at lists.llvm.org
Mon May 5 14:07:25 PDT 2025
Author: Jeffrey Byrnes
Date: 2025-05-05T14:07:23-07:00
New Revision: 00e7a0229525075362dc6e8625fa662799eac5fc
URL: https://github.com/llvm/llvm-project/commit/00e7a0229525075362dc6e8625fa662799eac5fc
DIFF: https://github.com/llvm/llvm-project/commit/00e7a0229525075362dc6e8625fa662799eac5fc.diff
LOG: [ScheduleDAG] Allow disabling the SchedModel / Itineraries during Scheduling (#138057)
This provides the `disable-schedmodel-in-sched-mi` flag. Using this, we
will disable the SchedModel / Itineraries during scheduling. This has
the effect of not using any latency / hardware resource information for
scheduling decisions.
We have the `schedmodel` flag, but this disables the `SchedModel` for
all passes. This allows disabling only for scheduling while preserving
the behavior of other passes (e.g. MachineLICM). This is conceptually
similar to other flags like `enable-aa-sched-mi`
Added:
llvm/test/CodeGen/AMDGPU/sched-no-schedmodel.mir
Modified:
llvm/include/llvm/CodeGen/TargetSchedule.h
llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
llvm/lib/CodeGen/TargetSchedule.cpp
llvm/test/CodeGen/AMDGPU/mai-hazards-gfx942.mir
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetSchedule.h b/llvm/include/llvm/CodeGen/TargetSchedule.h
index bfe4234abf8eb..69108a15e6cfe 100644
--- a/llvm/include/llvm/CodeGen/TargetSchedule.h
+++ b/llvm/include/llvm/CodeGen/TargetSchedule.h
@@ -45,6 +45,16 @@ class TargetSchedModel {
unsigned computeInstrLatency(const MCSchedClassDesc &SCDesc) const;
+ // EnableSchedModel and EnableSchedItins are used to control whether or not to
+ // use the Target's {SchedMachineModel, InstrItins} for hardware infor based
+ // Scheduling decisions. If both are enabled, as is the default, preference
+ // will be given to one based on the API implementation. By disabling one, we
+ // can force preference of the other. By disabling both, we will throw away
+ // any target specific hardware details for scheduling decisions, and fall
+ // into things that provide generic info such as defaultDefLatency.
+ bool EnableSchedModel = true;
+ bool EnableSchedItins = true;
+
public:
TargetSchedModel() : SchedModel(MCSchedModel::Default) {}
@@ -53,7 +63,8 @@ class TargetSchedModel {
/// The machine model API keeps a copy of the top-level MCSchedModel table
/// indices and may query TargetSubtargetInfo and TargetInstrInfo to resolve
/// dynamic properties.
- void init(const TargetSubtargetInfo *TSInfo);
+ void init(const TargetSubtargetInfo *TSInfo, bool EnableSModel = true,
+ bool EnableSItins = true);
/// Return the MCSchedClassDesc for this instruction.
const MCSchedClassDesc *resolveSchedClass(const MachineInstr *MI) const;
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index a26804707dd1f..eae2e8c1187e6 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -69,6 +69,14 @@ static cl::opt<bool>
static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
+static cl::opt<bool>
+ EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
+ cl::desc("Use TargetSchedModel for latency lookup"));
+
+static cl::opt<bool>
+ EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
+ cl::desc("Use InstrItineraryData for latency lookup"));
+
// Note: the two options below might be used in tuning compile time vs
// output quality. Setting HugeRegion so large that it will never be
// reached means best-effort, but may be slow.
@@ -121,7 +129,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
- SchedModel.init(&ST);
+ SchedModel.init(&ST, EnableSchedModel, EnableSchedItins);
}
/// If this machine instr has memory reference information and it can be
diff --git a/llvm/lib/CodeGen/TargetSchedule.cpp b/llvm/lib/CodeGen/TargetSchedule.cpp
index db884b4940395..7ae9e0e37bbab 100644
--- a/llvm/lib/CodeGen/TargetSchedule.cpp
+++ b/llvm/lib/CodeGen/TargetSchedule.cpp
@@ -29,12 +29,6 @@
using namespace llvm;
-static cl::opt<bool> EnableSchedModel("schedmodel", cl::Hidden, cl::init(true),
- cl::desc("Use TargetSchedModel for latency lookup"));
-
-static cl::opt<bool> EnableSchedItins("scheditins", cl::Hidden, cl::init(true),
- cl::desc("Use InstrItineraryData for latency lookup"));
-
static cl::opt<bool> ForceEnableIntervals(
"sched-model-force-enable-intervals", cl::Hidden, cl::init(false),
cl::desc("Force the use of resource intervals in the schedule model"));
@@ -47,12 +41,16 @@ bool TargetSchedModel::hasInstrItineraries() const {
return EnableSchedItins && !InstrItins.isEmpty();
}
-void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo) {
+void TargetSchedModel::init(const TargetSubtargetInfo *TSInfo,
+ bool EnableSModel, bool EnableSItins) {
STI = TSInfo;
SchedModel = TSInfo->getSchedModel();
TII = TSInfo->getInstrInfo();
STI->initInstrItins(InstrItins);
+ EnableSchedModel = EnableSModel;
+ EnableSchedItins = EnableSItins;
+
unsigned NumRes = SchedModel.getNumProcResourceKinds();
ResourceFactors.resize(NumRes);
ResourceLCM = SchedModel.IssueWidth;
diff --git a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx942.mir b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx942.mir
index d029043f90a85..8f4f57a5d37c5 100644
--- a/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx942.mir
+++ b/llvm/test/CodeGen/AMDGPU/mai-hazards-gfx942.mir
@@ -1,5 +1,6 @@
# RUN: llc -mtriple=amdgcn -mcpu=gfx942 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX942 %s
# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -verify-machineinstrs -run-pass post-RA-hazard-rec --schedmodel=0 %s -o - | FileCheck -check-prefixes=GCN,GFX950 %s
# GCN-LABEL: name: valu_write_vgpr_sgemm_mfma_read
# GCN: V_MOV_B32
diff --git a/llvm/test/CodeGen/AMDGPU/sched-no-schedmodel.mir b/llvm/test/CodeGen/AMDGPU/sched-no-schedmodel.mir
new file mode 100644
index 0000000000000..09b326c5a63a1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sched-no-schedmodel.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -misched-cluster=false --misched-prera-direction=topdown -run-pass=machine-scheduler --schedmodel=1 -o - %s | FileCheck -check-prefix=GCN %s
+# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -misched-cluster=false --misched-prera-direction=topdown -run-pass=machine-scheduler --schedmodel=0 -o - %s | FileCheck -check-prefix=GCN-NO-SCHEDMODEL %s
+
+---
+name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
+tracksRegLiveness: true
+body: |
+ bb.0:
+
+ ; GCN-LABEL: name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
+ ; GCN: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: early-clobber %2:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 [[DEF]].sub0_sub1, [[DEF1]].sub0_sub1, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: dead [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF2]], 0, 0, implicit $exec
+ ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: dead [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF3]], 0, 0, implicit $exec
+ ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: dead [[DS_READ_U16_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF4]], 0, 0, implicit $exec
+ ; GCN-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 %2.sub0, %2.sub1, implicit $exec
+ ; GCN-NEXT: early-clobber %3:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 [[DEF]].sub0_sub1, [[DEF1]].sub0_sub1, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: S_ENDPGM 0, implicit %2, implicit %3, implicit [[V_MUL_LO_U32_e64_]]
+ ;
+ ; GCN-NO-SCHEDMODEL-LABEL: name: sched_group_barrier_1_VMEM_READ_1_VALU_5_MFMA_1_VMEM_READ_3_VALU_2_VMEM_WRITE
+ ; GCN-NO-SCHEDMODEL: [[DEF:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; GCN-NO-SCHEDMODEL-NEXT: [[DEF1:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; GCN-NO-SCHEDMODEL-NEXT: early-clobber %2:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 [[DEF]].sub0_sub1, [[DEF1]].sub0_sub1, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NO-SCHEDMODEL-NEXT: early-clobber %3:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 [[DEF]].sub0_sub1, [[DEF1]].sub0_sub1, 0, 0, 0, 0, implicit $mode, implicit $exec
+ ; GCN-NO-SCHEDMODEL-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 %2.sub0, %2.sub1, implicit $exec
+ ; GCN-NO-SCHEDMODEL-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NO-SCHEDMODEL-NEXT: dead [[DS_READ_U16_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF2]], 0, 0, implicit $exec
+ ; GCN-NO-SCHEDMODEL-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NO-SCHEDMODEL-NEXT: dead [[DS_READ_U16_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF3]], 0, 0, implicit $exec
+ ; GCN-NO-SCHEDMODEL-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NO-SCHEDMODEL-NEXT: dead [[DS_READ_U16_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_U16_gfx9 [[DEF4]], 0, 0, implicit $exec
+ ; GCN-NO-SCHEDMODEL-NEXT: S_ENDPGM 0, implicit %2, implicit %3, implicit [[V_MUL_LO_U32_e64_]]
+ %0:vreg_128_align2 = IMPLICIT_DEF
+ %1:vreg_128_align2 = IMPLICIT_DEF
+ %2:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 %0.sub0_sub1:vreg_128_align2, %1.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %3:vreg_512_align2 = contract V_MFMA_F32_32X32X16_FP8_FP8_vgprcd_e64 %0.sub0_sub1:vreg_128_align2, %1.sub0_sub1:vreg_128_align2, 0, 0, 0, 0, implicit $mode, implicit $exec
+ %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %2.sub0, %2.sub1, implicit $exec
+ %5:vgpr_32 = IMPLICIT_DEF
+ %6:vgpr_32 = DS_READ_U16_gfx9 %5, 0, 0, implicit $exec
+ %7:vgpr_32 = IMPLICIT_DEF
+ %8:vgpr_32 = DS_READ_U16_gfx9 %7, 0, 0, implicit $exec
+ %9:vgpr_32 = IMPLICIT_DEF
+ %10:vgpr_32 = DS_READ_U16_gfx9 %9, 0, 0, implicit $exec
+ S_ENDPGM 0, implicit %2, implicit %3, implicit %4
+...
More information about the llvm-commits
mailing list