[llvm] [AMDGPU] Constrain AV->VReg if we do not exceed RP thresholds (PR #150086)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 23 17:31:59 PDT 2025
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/150086
>From c75b24e52d42f6001813028e529c9955237f130a Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Fri, 18 Jul 2025 16:55:44 -0700
Subject: [PATCH 1/4] [AMDGPU] Track AV Register Pressure separately
Change-Id: Ifcd242c111b139f62109b12b588bb6af764fe4df
---
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 6 +++-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 41 ++++++++++++++++-------
2 files changed, 33 insertions(+), 14 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index 7d6723a6108be..334afd3a2a5b4 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -38,7 +38,11 @@ bool llvm::isEqual(const GCNRPTracker::LiveRegSet &S1,
unsigned GCNRegPressure::getRegKind(const TargetRegisterClass *RC,
const SIRegisterInfo *STI) {
- return STI->isSGPRClass(RC) ? SGPR : (STI->isAGPRClass(RC) ? AGPR : VGPR);
+ return STI->isSGPRClass(RC)
+ ? SGPR
+ : (STI->isAGPRClass(RC)
+ ? AGPR
+ : (STI->isVectorSuperClass(RC) ? AVGPR : VGPR));
}
void GCNRegPressure::inc(unsigned Reg,
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index 3749b6d1efc63..5ec898351f922 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -29,43 +29,58 @@ class raw_ostream;
class SlotIndex;
struct GCNRegPressure {
- enum RegKind { SGPR, VGPR, AGPR, TOTAL_KINDS };
+ enum RegKind { SGPR, VGPR, AGPR, AVGPR, TOTAL_KINDS };
GCNRegPressure() {
clear();
}
- bool empty() const { return !Value[SGPR] && !Value[VGPR] && !Value[AGPR]; }
+ bool empty() const {
+ return !Value[SGPR] && !Value[VGPR] && !Value[AGPR] && !Value[AVGPR];
+ }
void clear() { std::fill(&Value[0], &Value[ValueArraySize], 0); }
/// \returns the SGPR32 pressure
unsigned getSGPRNum() const { return Value[SGPR]; }
- /// \returns the aggregated ArchVGPR32, AccVGPR32 pressure dependent upon \p
- /// UnifiedVGPRFile
+ /// \returns the aggregated ArchVGPR32, AccVGPR32, and Pseudo AVGPR pressure
+ /// dependent upon \p UnifiedVGPRFile
unsigned getVGPRNum(bool UnifiedVGPRFile) const {
if (UnifiedVGPRFile) {
- return Value[AGPR] ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR])
- : Value[VGPR];
+ return Value[AGPR]
+ ? getUnifiedVGPRNum(Value[VGPR], Value[AGPR], Value[AVGPR])
+ : Value[VGPR] + Value[AVGPR];
}
- return std::max(Value[VGPR], Value[AGPR]);
+ // Until we hit the VGPRThreshold, we will assign AV as VGPR. After that
+ // point, we will assign as AGPR.
+ return std::max(Value[VGPR] + Value[AVGPR], Value[AGPR]);
}
/// Returns the aggregated VGPR pressure, assuming \p NumArchVGPRs ArchVGPRs
- /// and \p NumAGPRs AGPRS, for a target with a unified VGPR file.
+ /// \p NumAGPRs AGPRS, and \p NumAVGPRs AVGPRs for a target with a unified
+ /// VGPR file.
inline static unsigned getUnifiedVGPRNum(unsigned NumArchVGPRs,
- unsigned NumAGPRs) {
- return alignTo(NumArchVGPRs, AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
+ unsigned NumAGPRs,
+ unsigned NumAVGPRs) {
+
+ // Until we hit the VGPRThreshold, we will assign AV as VGPR. After that
+ // point, we will assign as AGPR.
+ return alignTo(NumArchVGPRs + NumAVGPRs,
+ AMDGPU::IsaInfo::getArchVGPRAllocGranule()) +
NumAGPRs;
}
- /// \returns the ArchVGPR32 pressure
- unsigned getArchVGPRNum() const { return Value[VGPR]; }
+ /// \returns the ArchVGPR32 pressure, plus the AVGPRS which we assume will be
+ /// allocated as VGPR
+ unsigned getArchVGPRNum() const { return Value[VGPR] + Value[AVGPR]; }
/// \returns the AccVGPR32 pressure
unsigned getAGPRNum() const { return Value[AGPR]; }
+ /// \returns the AVGPR32 pressure
+ unsigned getAVGPRNum() const { return Value[AVGPR]; }
unsigned getVGPRTuplesWeight() const {
- return std::max(Value[TOTAL_KINDS + VGPR], Value[TOTAL_KINDS + AGPR]);
+ return std::max(Value[TOTAL_KINDS + VGPR] + Value[TOTAL_KINDS + AVGPR],
+ Value[TOTAL_KINDS + AGPR]);
}
unsigned getSGPRTuplesWeight() const { return Value[TOTAL_KINDS + SGPR]; }
>From e3dd5accb24340ec6bca5124fb62cf53a8cf697f Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Tue, 15 Jul 2025 15:10:41 -0700
Subject: [PATCH 2/4] [AMDGPU] Constrain AV->VReg if we do not exceed RP
thresholds
Change-Id: I17cb012504946fa9dca88b32548f922e2ce4b7a9
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 88 ++++++++
llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 30 ++-
.../AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir | 40 ++--
.../AMDGPU/llvm.amdgcn.iglp.opt.single.2c.mir | 16 +-
.../AMDGPU/schedule-reconstrain-avgpr.mir | 188 ++++++++++++++++++
5 files changed, 329 insertions(+), 33 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/schedule-reconstrain-avgpr.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index a6553083d722b..9189361324a1c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -528,6 +528,7 @@ GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy(
const MachineSchedContext *C, bool IsLegacyScheduler)
: GCNSchedStrategy(C) {
SchedStages.push_back(GCNSchedStageID::OccInitialSchedule);
+ SchedStages.push_back(GCNSchedStageID::AVGPRRewriteSchedule);
SchedStages.push_back(GCNSchedStageID::UnclusteredHighRPReschedule);
SchedStages.push_back(GCNSchedStageID::ClusteredLowOccupancyReschedule);
SchedStages.push_back(GCNSchedStageID::PreRARematerialize);
@@ -778,6 +779,8 @@ GCNScheduleDAGMILive::createSchedStage(GCNSchedStageID SchedStageID) {
switch (SchedStageID) {
case GCNSchedStageID::OccInitialSchedule:
return std::make_unique<OccInitialScheduleStage>(SchedStageID, *this);
+ case GCNSchedStageID::AVGPRRewriteSchedule:
+ return std::make_unique<AVGPRRewriteScheduleStage>(SchedStageID, *this);
case GCNSchedStageID::UnclusteredHighRPReschedule:
return std::make_unique<UnclusteredHighRPStage>(SchedStageID, *this);
case GCNSchedStageID::ClusteredLowOccupancyReschedule:
@@ -941,10 +944,14 @@ void GCNScheduleDAGMILive::finalizeSchedule() {
Pressure.resize(Regions.size());
RegionsWithHighRP.resize(Regions.size());
RegionsWithExcessRP.resize(Regions.size());
+ RegionsWithAVRegs.resize(Regions.size());
+ RegionsWithExcessVGPRRP.resize(Regions.size());
RegionsWithMinOcc.resize(Regions.size());
RegionsWithIGLPInstrs.resize(Regions.size());
RegionsWithHighRP.reset();
RegionsWithExcessRP.reset();
+ RegionsWithAVRegs.reset();
+ RegionsWithExcessVGPRRP.reset();
RegionsWithMinOcc.reset();
RegionsWithIGLPInstrs.reset();
@@ -1003,6 +1010,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const GCNSchedStageID &StageID) {
case GCNSchedStageID::OccInitialSchedule:
OS << "Max Occupancy Initial Schedule";
break;
+ case GCNSchedStageID::AVGPRRewriteSchedule:
+ OS << "AVGPR Rewriting Reschedule";
+ break;
case GCNSchedStageID::UnclusteredHighRPReschedule:
OS << "Unclustered High Register Pressure Reschedule";
break;
@@ -1036,6 +1046,78 @@ bool GCNSchedStage::initGCNSchedStage() {
return true;
}
+bool AVGPRRewriteScheduleStage::reconstrainRegClass(
+ Register Reg, const TargetRegisterClass *NewRC) const {
+ const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const TargetRegisterClass *OldRC = DAG.MRI.getRegClass(Reg);
+ const TargetRegisterInfo *TRI = DAG.MRI.getTargetRegisterInfo();
+ const TargetRegisterClass *ConstrainRC = NewRC;
+ const SIRegisterInfo *SRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
+
+ // Stop early if there is nothing to do.
+ if (!NewRC || NewRC == OldRC)
+ return false;
+
+ // Accumulate constraints from all uses.
+ for (MachineOperand &MO : DAG.MRI.reg_nodbg_operands(Reg)) {
+ // Apply the effect of the given operand to NewRC.
+ MachineInstr *MI = MO.getParent();
+ unsigned OpNo = &MO - &MI->getOperand(0);
+ ConstrainRC = MI->getRegClassConstraintEffect(OpNo, ConstrainRC, TII, TRI);
+ if (!ConstrainRC)
+ return false;
+ if (MI->isCopy()) {
+ MachineOperand &OtherOp = MI->getOperand(1 - OpNo);
+ if (!OtherOp.isReg())
+ continue;
+
+ if (!SRI->isVGPR(DAG.MRI, OtherOp.getReg()))
+ return false;
+ }
+ }
+ DAG.MRI.setRegClass(Reg, ConstrainRC);
+ return true;
+}
+
+bool AVGPRRewriteScheduleStage::initGCNSchedStage() {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
+ // The main benefit of AVReg usage is that the register can be assigned to
+ // either VGPR or AGPR. However, for the unified RF case, we should only be
+ // using AGPR if strictly necessary. That is, if the required number of VGPRs
+ // exceeds the addressable limit. Otherwise, we should be stricly using VGPRs
+ // to minimize cross RC copies. Thus, if we are underc this limit, we should
+ // constrain AVReg- > VReg.
+ // TODO: AVReg constraining for non unified case.
+ if (!ST.hasGFX90AInsts() || DAG.RegionsWithAVRegs.empty() ||
+ DAG.RegionsWithExcessVGPRRP.any())
+ return false;
+
+ const SIRegisterInfo *SRI = ST.getRegisterInfo();
+
+ for (unsigned I = 0, E = DAG.MRI.getNumVirtRegs(); I != E; ++I) {
+ Register Reg = Register::index2VirtReg(I);
+ if (!DAG.LIS->hasInterval(Reg))
+ continue;
+ const TargetRegisterClass *RC = DAG.MRI.getRegClass(Reg);
+ if (!SRI->isVectorSuperClass(RC))
+ continue;
+
+ reconstrainRegClass(Reg, SRI->getEquivalentVGPRClass(RC));
+ }
+
+ // TODO -- opposite case, inflate to AV when we have AVGPR + VGPR RP greater
+ // than addressable limit.
+
+ // TODO - after we separate out AVGPR pressure from the e.g. getVGPRNum
+ // pressure queries, we may need to update the cached RP.
+
+ // TODO - there is a benefit to rescheduling with the constraints, as the
+ // generic trackers do not track AVGPR pressure. But we should teach the
+ // default trackers about AVGPR rather than doing rescheduling here.
+ return false;
+}
+
bool UnclusteredHighRPStage::initGCNSchedStage() {
if (DisableUnclusterHighRP)
return false;
@@ -1278,6 +1360,9 @@ void GCNSchedStage::checkScheduling() {
LLVM_DEBUG(dbgs() << "Pressure after scheduling: " << print(PressureAfter));
LLVM_DEBUG(dbgs() << "Region: " << RegionIdx << ".\n");
+ if (PressureAfter.getAVGPRNum())
+ DAG.RegionsWithAVRegs[RegionIdx] = true;
+
unsigned DynamicVGPRBlockSize = DAG.MFI.getDynamicVGPRBlockSize();
if (PressureAfter.getSGPRNum() <= S.SGPRCriticalLimit &&
@@ -1331,6 +1416,9 @@ void GCNSchedStage::checkScheduling() {
unsigned MaxArchVGPRs = std::min(MaxVGPRs, ST.getAddressableNumArchVGPRs());
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
+ if (PressureAfter.getArchVGPRNum() > ST.getAddressableNumArchVGPRs())
+ DAG.RegionsWithExcessVGPRRP[RegionIdx] = true;
+
if (PressureAfter.getVGPRNum(ST.hasGFX90AInsts()) > MaxVGPRs ||
PressureAfter.getArchVGPRNum() > MaxArchVGPRs ||
PressureAfter.getAGPRNum() > MaxArchVGPRs ||
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
index 94cd795bbc8f6..7575d7611bbcb 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h
@@ -28,11 +28,12 @@ class GCNSchedStage;
enum class GCNSchedStageID : unsigned {
OccInitialSchedule = 0,
- UnclusteredHighRPReschedule = 1,
- ClusteredLowOccupancyReschedule = 2,
- PreRARematerialize = 3,
- ILPInitialSchedule = 4,
- MemoryClauseInitialSchedule = 5
+ AVGPRRewriteSchedule = 1,
+ UnclusteredHighRPReschedule = 2,
+ ClusteredLowOccupancyReschedule = 3,
+ PreRARematerialize = 4,
+ ILPInitialSchedule = 5,
+ MemoryClauseInitialSchedule = 6
};
#ifndef NDEBUG
@@ -224,6 +225,7 @@ using RegionBoundaries =
class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
friend class GCNSchedStage;
friend class OccInitialScheduleStage;
+ friend class AVGPRRewriteScheduleStage;
friend class UnclusteredHighRPStage;
friend class ClusteredLowOccStage;
friend class PreRARematStage;
@@ -250,9 +252,15 @@ class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
// limit. Register pressure in these regions usually will result in spilling.
BitVector RegionsWithExcessRP;
+ // Regions that have VGPR RP which exceed the addressable limit.
+ BitVector RegionsWithExcessVGPRRP;
+
// Regions that has the same occupancy as the latest MinOccupancy
BitVector RegionsWithMinOcc;
+ // Regions which use the AV RC.
+ BitVector RegionsWithAVRegs;
+
// Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
BitVector RegionsWithIGLPInstrs;
@@ -401,6 +409,18 @@ class OccInitialScheduleStage : public GCNSchedStage {
: GCNSchedStage(StageID, DAG) {}
};
+class AVGPRRewriteScheduleStage : public GCNSchedStage {
+private:
+ bool reconstrainRegClass(Register Reg,
+ const TargetRegisterClass *NewRC) const;
+
+public:
+ bool initGCNSchedStage() override;
+
+ AVGPRRewriteScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
+ : GCNSchedStage(StageID, DAG) {}
+};
+
class UnclusteredHighRPStage : public GCNSchedStage {
private:
// Save the initial occupancy before starting this stage.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
index e93595b9ef273..0ec67f44e2cbb 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2b.mir
@@ -23,10 +23,10 @@ body: |
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF2:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF3:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF4:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF5:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF3:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF4:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF5:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -75,40 +75,40 @@ body: |
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 0, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DS_READ_B128_gfx9_2:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DS_READ_B128_gfx9_3:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 2064, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 0, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_2:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_3:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 2064, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_1]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_4:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 2080, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_4:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 2080, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF8]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF9]], implicit $exec
; GCN-NEXT: dead [[V_MFMA_F32_32X32X8F16_mac_e64_:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_1]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_5:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 3120, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_5:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF6]], 3120, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF10]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF11]], implicit $exec
; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_3]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_6:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 4128, 0, implicit $exec :: (load (s128) from %ir.in6, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_6:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 4128, 0, implicit $exec :: (load (s128) from %ir.in6, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_4:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF12]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_5:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF13]], implicit $exec
; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_3]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_7:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 6192, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_7:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 6192, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_6:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF14]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_7:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF15]], implicit $exec
; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub0_sub1, [[DS_READ_B128_gfx9_6]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_8:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 1024, 0, implicit $exec :: (load (s128) from %ir.in8, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_8:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 1024, 0, implicit $exec :: (load (s128) from %ir.in8, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_8:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF16]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_9:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF17]], implicit $exec
; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub2_sub3, [[DS_READ_B128_gfx9_6]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_9:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 3088, 0, implicit $exec :: (load (s128) from %ir.in9, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_9:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 3088, 0, implicit $exec :: (load (s128) from %ir.in9, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_10:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF18]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_11:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF19]], implicit $exec
; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub0_sub1, [[DS_READ_B128_gfx9_7]].sub0_sub1, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_10:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 5152, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_10:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 5152, 0, implicit $exec :: (load (s128) from %ir.in10, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_12:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF20]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_13:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF42]], [[DEF21]], implicit $exec
; GCN-NEXT: [[DEF:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub2_sub3, [[DS_READ_B128_gfx9_7]].sub2_sub3, [[DEF]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_11:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 7216, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_11:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF7]], 7216, 0, implicit $exec :: (load (s128) from %ir.in11, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_8]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: undef [[V_PERM_B32_e64_:%[0-9]+]].sub1:vreg_128_align2 = V_PERM_B32_e64 [[DEF28]], [[DEF29]], [[DEF44]], implicit $exec
; GCN-NEXT: [[V_PERM_B32_e64_:%[0-9]+]].sub0:vreg_128_align2 = V_PERM_B32_e64 [[DEF30]], [[DEF31]], [[DEF44]], implicit $exec
@@ -145,20 +145,20 @@ body: |
; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF41]], [[DEF2]], 0, 0, implicit $exec :: (store (s128) into %ir.in2, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_16:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -16, [[DEF45]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_17:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 -16, [[DEF46]], implicit $exec
- ; GCN-NEXT: [[DEF2:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[V_ADD_U32_e32_16]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in26, !alias.scope !0, addrspace 7)
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[V_ADD_U32_e32_16]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in26, !alias.scope !0, addrspace 7)
; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_4]].sub2_sub3, [[DS_READ_B128_gfx9_10]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF41]], [[DEF3]], 2064, 0, implicit $exec :: (store (s128) into %ir.in3, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DEF3:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF45]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in27, !alias.scope !0, addrspace 7)
+ ; GCN-NEXT: [[DEF3:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF45]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in27, !alias.scope !0, addrspace 7)
; GCN-NEXT: [[DEF45:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[DEF45]], implicit $exec
; GCN-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF23]], implicit $exec
; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub0_sub1, [[DS_READ_B128_gfx9_11]].sub0_sub1, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF41]], [[DEF4]], 2080, 0, implicit $exec :: (store (s128) into %ir.in4, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DEF4:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF46]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in28, !alias.scope !0, addrspace 7)
+ ; GCN-NEXT: [[DEF4:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF46]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in28, !alias.scope !0, addrspace 7)
; GCN-NEXT: [[DEF46:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 128, [[DEF46]], implicit $exec
; GCN-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF43]], [[DEF22]], implicit $exec
; GCN-NEXT: [[DEF1:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_5]].sub2_sub3, [[DS_READ_B128_gfx9_11]].sub2_sub3, [[DEF1]], 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF41]], [[DEF5]], 16, 0, implicit $exec :: (store (s128) into %ir.in5, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DEF5:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[V_ADD_U32_e32_17]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in29, !alias.scope !0, addrspace 7)
+ ; GCN-NEXT: [[DEF5:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[V_ADD_U32_e32_17]], [[DEF48]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in29, !alias.scope !0, addrspace 7)
; GCN-NEXT: IGLP_OPT 1
; GCN-NEXT: [[DEF49:%[0-9]+]]:sreg_32 = nsw S_ADD_I32 [[DEF49]], -1, implicit-def dead $scc
; GCN-NEXT: S_CMP_LG_U32 [[DEF49]], 0, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2c.mir b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2c.mir
index a85478df10eb2..c7bc310b767c1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2c.mir
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.iglp.opt.single.2c.mir
@@ -35,7 +35,7 @@ body: |
; GCN-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
- ; GCN-NEXT: [[DEF16:%[0-9]+]]:av_128_align2 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF17:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF18:%[0-9]+]]:vreg_128_align2 = IMPLICIT_DEF
; GCN-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
@@ -57,22 +57,22 @@ body: |
; GCN-NEXT: bb.1:
; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
; GCN-NEXT: {{ $}}
- ; GCN-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 0, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DS_READ_B128_gfx9_2:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DS_READ_B128_gfx9_3:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 2064, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 0, 0, implicit $exec :: (load (s128) from %ir.in0, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_1:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 0, 0, implicit $exec :: (load (s128) from %ir.in2, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_2:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF2]], 1040, 0, implicit $exec :: (load (s128) from %ir.in1, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_3:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 2064, 0, implicit $exec :: (load (s128) from %ir.in3, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_512_align2 = COPY [[DEF1]]
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub0_sub1, [[DS_READ_B128_gfx9_1]].sub0_sub1, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_4:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 1024, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_4:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 1024, 0, implicit $exec :: (load (s128) from %ir.in4, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF33]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF21]], implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_]].sub2_sub3, [[DS_READ_B128_gfx9_1]].sub2_sub3, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
- ; GCN-NEXT: [[DS_READ_B128_gfx9_5:%[0-9]+]]:av_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 3088, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
+ ; GCN-NEXT: [[DS_READ_B128_gfx9_5:%[0-9]+]]:vreg_128_align2 = DS_READ_B128_gfx9 [[DEF3]], 3088, 0, implicit $exec :: (load (s128) from %ir.in5, !alias.scope !0, addrspace 3)
; GCN-NEXT: [[V_ADD_U32_e32_2:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF22]], implicit $exec
; GCN-NEXT: [[V_ADD_U32_e32_3:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[DEF19]], [[DEF23]], implicit $exec
; GCN-NEXT: [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub0_sub1, [[DS_READ_B128_gfx9_3]].sub0_sub1, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: DS_WRITE_B128_gfx9 [[DEF4]], [[DEF16]], 0, 0, implicit $exec :: (store (s128) into %ir.in6, !alias.scope !0, addrspace 3)
- ; GCN-NEXT: [[DEF16:%[0-9]+]]:av_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF6]], [[DEF7]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 7)
+ ; GCN-NEXT: [[DEF16:%[0-9]+]]:vreg_128_align2 = BUFFER_LOAD_DWORDX4_OFFEN [[DEF6]], [[DEF7]], 0, 0, 0, 0, implicit $exec :: (load (s128) from %ir.in7, !alias.scope !0, addrspace 7)
; GCN-NEXT: dead [[COPY:%[0-9]+]]:areg_512_align2 = contract V_MFMA_F32_32X32X8F16_mac_e64 [[DS_READ_B128_gfx9_2]].sub2_sub3, [[DS_READ_B128_gfx9_3]].sub2_sub3, [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
; GCN-NEXT: [[COPY1:%[0-9]+]]:areg_512_align2 = COPY [[DEF]]
; GCN-NEXT: undef [[DEF17:%[0-9]+]].sub2:vreg_128_align2 = V_PERM_B32_e64 [[DEF13]], [[DEF12]], [[DEF30]], implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/schedule-reconstrain-avgpr.mir b/llvm/test/CodeGen/AMDGPU/schedule-reconstrain-avgpr.mir
new file mode 100644
index 0000000000000..9350604f73191
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/schedule-reconstrain-avgpr.mir
@@ -0,0 +1,188 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx950 -run-pass=machine-scheduler -o - %s | FileCheck -check-prefix=UNIFIED %s
+# RUN: llc -mtriple=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -o - %s | FileCheck -check-prefix=SPLIT %s
+
+---
+name: reconstrain
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27'
+ frameOffsetReg: '$sgpr32'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; UNIFIED-LABEL: name: reconstrain
+ ; UNIFIED: liveins: $vgpr0, $vgpr1
+ ; UNIFIED-NEXT: {{ $}}
+ ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]]
+ ; UNIFIED-NEXT: S_ENDPGM 0
+ ;
+ ; SPLIT-LABEL: name: reconstrain
+ ; SPLIT: liveins: $vgpr0, $vgpr1
+ ; SPLIT-NEXT: {{ $}}
+ ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF
+ ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]]
+ ; SPLIT-NEXT: S_ENDPGM 0
+ %0:av_64_align2 = IMPLICIT_DEF
+ S_NOP 0, implicit %0
+ S_ENDPGM 0
+...
+
+---
+name: no_reconstrain_use
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27'
+ frameOffsetReg: '$sgpr32'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; UNIFIED-LABEL: name: no_reconstrain_use
+ ; UNIFIED: liveins: $vgpr0, $vgpr1
+ ; UNIFIED-NEXT: {{ $}}
+ ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, [[DEF]]
+ ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]]
+ ; UNIFIED-NEXT: S_ENDPGM 0
+ ;
+ ; SPLIT-LABEL: name: no_reconstrain_use
+ ; SPLIT: liveins: $vgpr0, $vgpr1
+ ; SPLIT-NEXT: {{ $}}
+ ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF
+ ; SPLIT-NEXT: INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, [[DEF]]
+ ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]]
+ ; SPLIT-NEXT: S_ENDPGM 0
+ %0:av_64_align2 = IMPLICIT_DEF
+ INLINEASM &"; use $0", 0 /* attdialect */, 3473417 /* reguse:AReg_64 */, %0
+ S_NOP 0, implicit %0
+ S_ENDPGM 0
+...
+
+---
+name: no_reconstrain_rp
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27'
+ frameOffsetReg: '$sgpr32'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; UNIFIED-LABEL: name: no_reconstrain_rp
+ ; UNIFIED: liveins: $vgpr0, $vgpr1
+ ; UNIFIED-NEXT: {{ $}}
+ ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]]
+ ; UNIFIED-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
+ ; UNIFIED-NEXT: S_ENDPGM 0, amdgpu_allvgprs
+ ;
+ ; SPLIT-LABEL: name: no_reconstrain_rp
+ ; SPLIT: liveins: $vgpr0, $vgpr1
+ ; SPLIT-NEXT: {{ $}}
+ ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF
+ ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]]
+ ; SPLIT-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]]
+ ; SPLIT-NEXT: S_ENDPGM 0, amdgpu_allvgprs
+ %0:av_64_align2 = IMPLICIT_DEF
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit %0
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8
+ S_ENDPGM 0, amdgpu_allvgprs
+...
+
+---
+name: reconstrain_rp
+tracksRegLiveness: true
+machineFunctionInfo:
+ isEntryFunction: true
+ scratchRSrcReg: '$sgpr24_sgpr25_sgpr26_sgpr27'
+ frameOffsetReg: '$sgpr32'
+ stackPtrOffsetReg: '$sgpr32'
+ argumentInfo:
+ privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
+ privateSegmentWaveByteOffset: { reg: '$sgpr33' }
+body: |
+ bb.0:
+ liveins: $vgpr0, $vgpr1
+ ; UNIFIED-LABEL: name: reconstrain_rp
+ ; UNIFIED: liveins: $vgpr0, $vgpr1
+ ; UNIFIED-NEXT: {{ $}}
+ ; UNIFIED-NEXT: [[DEF:%[0-9]+]]:vreg_64_align2 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]]
+ ; UNIFIED-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
+ ; UNIFIED-NEXT: dead [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; UNIFIED-NEXT: S_ENDPGM 0, amdgpu_allvgprs
+ ;
+ ; SPLIT-LABEL: name: reconstrain_rp
+ ; SPLIT: liveins: $vgpr0, $vgpr1
+ ; SPLIT-NEXT: {{ $}}
+ ; SPLIT-NEXT: [[DEF:%[0-9]+]]:av_64_align2 = IMPLICIT_DEF
+ ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]]
+ ; SPLIT-NEXT: [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF2:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF4:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF5:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF6:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: [[DEF7:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: S_NOP 0, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]]
+ ; SPLIT-NEXT: dead [[DEF8:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
+ ; SPLIT-NEXT: S_ENDPGM 0, amdgpu_allvgprs
+ %0:av_64_align2 = IMPLICIT_DEF
+ %1:vreg_1024 = IMPLICIT_DEF
+ %2:vreg_1024 = IMPLICIT_DEF
+ %3:vreg_1024 = IMPLICIT_DEF
+ %4:vreg_1024 = IMPLICIT_DEF
+ %5:vreg_1024 = IMPLICIT_DEF
+ %6:vreg_1024 = IMPLICIT_DEF
+ %7:vreg_1024 = IMPLICIT_DEF
+ %8:vreg_1024 = IMPLICIT_DEF
+ S_NOP 0, implicit %0
+ S_NOP 0, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7
+ S_ENDPGM 0, amdgpu_allvgprs
+...
>From 041a72d7de8e64de1088d946f86fae2a45af863d Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <jeffrey.byrnes at amd.com>
Date: Wed, 23 Jul 2025 17:31:47 -0700
Subject: [PATCH 3/4] Update llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 9189361324a1c..7dddd3f8ffd6b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1048,7 +1048,7 @@ bool GCNSchedStage::initGCNSchedStage() {
bool AVGPRRewriteScheduleStage::reconstrainRegClass(
Register Reg, const TargetRegisterClass *NewRC) const {
- const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
+ const SIInstrInfo *TII = MF.getSubtarget<GCNSubtarget>().getInstrInfo();
const TargetRegisterClass *OldRC = DAG.MRI.getRegClass(Reg);
const TargetRegisterInfo *TRI = DAG.MRI.getTargetRegisterInfo();
const TargetRegisterClass *ConstrainRC = NewRC;
>From 1d419524fc7dea734f919add2540230bf642e458 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <jeffrey.byrnes at amd.com>
Date: Wed, 23 Jul 2025 17:31:53 -0700
Subject: [PATCH 4/4] Update llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
Co-authored-by: Matt Arsenault <arsenm2 at gmail.com>
---
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index 7dddd3f8ffd6b..d4e6e1b6d581a 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -1086,7 +1086,7 @@ bool AVGPRRewriteScheduleStage::initGCNSchedStage() {
// either VGPR or AGPR. However, for the unified RF case, we should only be
// using AGPR if strictly necessary. That is, if the required number of VGPRs
// exceeds the addressable limit. Otherwise, we should be stricly using VGPRs
- // to minimize cross RC copies. Thus, if we are underc this limit, we should
+ // to minimize cross RC copies. Thus, if we are under this limit, we should
// constrain AVReg- > VReg.
// TODO: AVReg constraining for non unified case.
if (!ST.hasGFX90AInsts() || DAG.RegionsWithAVRegs.empty() ||
More information about the llvm-commits
mailing list