[llvm] [AMDGPU] Prefer lower total register usage in regions with spilling (PR #71882)
Jeffrey Byrnes via llvm-commits
llvm-commits at lists.llvm.org
Thu Dec 14 13:40:39 PST 2023
https://github.com/jrbyrnes updated https://github.com/llvm/llvm-project/pull/71882
>From e8295773ac212824daa71875430cce70bdbbf8d0 Mon Sep 17 00:00:00 2001
From: Jeffrey Byrnes <Jeffrey.Byrnes at amd.com>
Date: Thu, 9 Nov 2023 12:03:06 -0800
Subject: [PATCH] [AMDGPU] Prefer lower total register usage in regions with
spilling
Change-Id: Ib9872c0f675a60b098f73913bdc27d4bf52c7176
---
.../Target/AMDGPU/GCNIterativeScheduler.cpp | 12 +-
llvm/lib/Target/AMDGPU/GCNRegPressure.cpp | 75 +++-
llvm/lib/Target/AMDGPU/GCNRegPressure.h | 16 +-
llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 6 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 6 +
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 +-
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 4 +
.../CodeGen/AMDGPU/spill-regpressure-less.mir | 353 ++++++++++++++++++
8 files changed, 459 insertions(+), 17 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
diff --git a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
index d89c9b1febded0..34108dd5b330bc 100644
--- a/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNIterativeScheduler.cpp
@@ -409,9 +409,8 @@ void GCNIterativeScheduler::scheduleRegion(Region &R, Range &&Schedule,
// Sort recorded regions by pressure - highest at the front
void GCNIterativeScheduler::sortRegionsByPressure(unsigned TargetOcc) {
- const auto &ST = MF.getSubtarget<GCNSubtarget>();
- llvm::sort(Regions, [&ST, TargetOcc](const Region *R1, const Region *R2) {
- return R2->MaxPressure.less(ST, R1->MaxPressure, TargetOcc);
+ llvm::sort(Regions, [this, TargetOcc](const Region *R1, const Region *R2) {
+ return R2->MaxPressure.less(MF, R1->MaxPressure, TargetOcc);
});
}
@@ -517,26 +516,25 @@ void GCNIterativeScheduler::scheduleLegacyMaxOccupancy(
// Minimal Register Strategy
void GCNIterativeScheduler::scheduleMinReg(bool force) {
- const auto &ST = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const auto TgtOcc = MFI->getOccupancy();
sortRegionsByPressure(TgtOcc);
auto MaxPressure = Regions.front()->MaxPressure;
for (auto *R : Regions) {
- if (!force && R->MaxPressure.less(ST, MaxPressure, TgtOcc))
+ if (!force && R->MaxPressure.less(MF, MaxPressure, TgtOcc))
break;
BuildDAG DAG(*R, *this);
const auto MinSchedule = makeMinRegSchedule(DAG.getTopRoots(), *this);
const auto RP = getSchedulePressure(*R, MinSchedule);
- LLVM_DEBUG(if (R->MaxPressure.less(ST, RP, TgtOcc)) {
+ LLVM_DEBUG(if (R->MaxPressure.less(MF, RP, TgtOcc)) {
dbgs() << "\nWarning: Pressure becomes worse after minreg!";
printSchedRP(dbgs(), R->MaxPressure, RP);
});
- if (!force && MaxPressure.less(ST, RP, TgtOcc))
+ if (!force && MaxPressure.less(MF, RP, TgtOcc))
break;
scheduleRegion(*R, MinSchedule, RP);
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
index a04c470b7b9762..f3b28031f0185d 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.cpp
@@ -88,9 +88,10 @@ void GCNRegPressure::inc(unsigned Reg,
}
}
-bool GCNRegPressure::less(const GCNSubtarget &ST,
- const GCNRegPressure& O,
+bool GCNRegPressure::less(const MachineFunction &MF, const GCNRegPressure &O,
unsigned MaxOccupancy) const {
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
+
const auto SGPROcc = std::min(MaxOccupancy,
ST.getOccupancyWithNumSGPRs(getSGPRNum()));
const auto VGPROcc =
@@ -104,18 +105,82 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
const auto Occ = std::min(SGPROcc, VGPROcc);
const auto OtherOcc = std::min(OtherSGPROcc, OtherVGPROcc);
+
+ // Give first precedence to the better occupancy.
if (Occ != OtherOcc)
return Occ > OtherOcc;
+ unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
+ unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
+
+ // SGPR excess pressure conditions
+ unsigned ExcessSGPR = std::max(static_cast<int>(getSGPRNum() - MaxSGPRs), 0);
+ unsigned OtherExcessSGPR =
+ std::max(static_cast<int>(O.getSGPRNum() - MaxSGPRs), 0);
+
+ auto WaveSize = ST.getWavefrontSize();
+ // The number of virtual VGPRs required to handle excess SGPR
+ unsigned SGPRSpills = (ExcessSGPR + (WaveSize - 1)) / WaveSize;
+ unsigned OtherSGPRSpills = (OtherExcessSGPR + (WaveSize - 1)) / WaveSize;
+
+ unsigned MaxArchVGPRs = ST.getAddressableNumArchVGPRs();
+
+ // Unified excess pressure conditions, accounting for VGPRs used for SGPR
+ // spills
+ unsigned ExcessVGPR = std::max(
+ static_cast<int>(getVGPRNum(ST.hasGFX90AInsts()) + SGPRSpills - MaxVGPRs),
+ 0);
+ unsigned OtherExcessVGPR =
+ std::max(static_cast<int>(O.getVGPRNum(ST.hasGFX90AInsts()) +
+ OtherSGPRSpills - MaxVGPRs),
+ 0);
+ // Arch VGPR excess pressure conditions, accounting for VGPRs used for SGPR
+ // spills
+ unsigned ExcessArchVGPR = std::max(
+ static_cast<int>(getVGPRNum(false) + SGPRSpills - MaxArchVGPRs), 0);
+ unsigned OtherExcessArchVGPR = std::max(
+ static_cast<int>(O.getVGPRNum(false) + OtherSGPRSpills - MaxArchVGPRs),
+ 0);
+ // AGPR excess pressure conditions
+ unsigned ExcessAGPR = std::max(
+ static_cast<int>(ST.hasGFX90AInsts() ? (getAGPRNum() - MaxArchVGPRs)
+ : (getAGPRNum() - MaxVGPRs)),
+ 0);
+ unsigned OtherExcessAGPR = std::max(
+ static_cast<int>(ST.hasGFX90AInsts() ? (O.getAGPRNum() - MaxArchVGPRs)
+ : (O.getAGPRNum() - MaxVGPRs)),
+ 0);
+
+ bool ExcessRP = ExcessSGPR || ExcessVGPR || ExcessArchVGPR || ExcessAGPR;
+ bool OtherExcessRP = OtherExcessSGPR || OtherExcessVGPR ||
+ OtherExcessArchVGPR || OtherExcessAGPR;
+
+ // Give second precedence to the reduced number of spills to hold the register
+ // pressure.
+ if (ExcessRP || OtherExcessRP) {
+ // The difference in excess VGPR pressure, after including VGPRs used for
+ // SGPR spills
+ int VGPRDiff = ((OtherExcessVGPR + OtherExcessArchVGPR + OtherExcessAGPR) -
+ (ExcessVGPR + ExcessArchVGPR + ExcessAGPR));
+
+ int SGPRDiff = OtherExcessSGPR - ExcessSGPR;
+
+ // If excess VGPR is the same, prefer the lower excess SGPR
+ if (VGPRDiff != 0)
+ return VGPRDiff > 0;
+ else if (SGPRDiff != 0)
+ return SGPRDiff > 0;
+ }
+
bool SGPRImportant = SGPROcc < VGPROcc;
const bool OtherSGPRImportant = OtherSGPROcc < OtherVGPROcc;
- // if both pressures disagree on what is more important compare vgprs
+ // If both pressures disagree on what is more important compare vgprs.
if (SGPRImportant != OtherSGPRImportant) {
SGPRImportant = false;
}
- // compare large regs pressure
+ // Give thid precedence to lower register tuple pressure.
bool SGPRFirst = SGPRImportant;
for (int I = 2; I > 0; --I, SGPRFirst = !SGPRFirst) {
if (SGPRFirst) {
@@ -130,6 +195,8 @@ bool GCNRegPressure::less(const GCNSubtarget &ST,
return VW < OtherVW;
}
}
+
+ // Give final precedence to lower general RP.
return SGPRImportant ? (getSGPRNum() < O.getSGPRNum()):
(getVGPRNum(ST.hasGFX90AInsts()) <
O.getVGPRNum(ST.hasGFX90AInsts()));
diff --git a/llvm/lib/Target/AMDGPU/GCNRegPressure.h b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
index c750fe74749e2b..aa7457cd3f8f75 100644
--- a/llvm/lib/Target/AMDGPU/GCNRegPressure.h
+++ b/llvm/lib/Target/AMDGPU/GCNRegPressure.h
@@ -74,8 +74,20 @@ struct GCNRegPressure {
return getOccupancy(ST) > O.getOccupancy(ST);
}
- bool less(const GCNSubtarget &ST, const GCNRegPressure& O,
- unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
+ /// Compares \p this GCNRegpressure to \p O, returning true if \p this is
+ /// less. Since GCNRegpressure contains different types of pressures, and due
+ /// to target-specific pecularities (e.g. we care about occupancy rather than
+ /// raw register usage), we determine if \p this GCNRegPressure is less than
+ /// \p O based on the following tiered comparisons (in order order of
+ /// precedence):
+ /// 1. Better occupancy
+ /// 2. Less spilling (first preference to VGPR spills, then to SGPR spills)
+ /// 3. Less tuple register pressure (first preference to VGPR tuples if we
+ /// determine that SGPR pressure is not important)
+ /// 4. Less raw register pressure (first preference to VGPR tuples if we
+ /// determine that SGPR pressure is not important)
+ bool less(const MachineFunction &MF, const GCNRegPressure &O,
+ unsigned MaxOccupancy = std::numeric_limits<unsigned>::max()) const;
bool operator==(const GCNRegPressure &O) const {
return std::equal(&Value[0], &Value[TOTAL_KINDS], O.Value);
diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
index c3d60b635d3240..f7d2b458151b9d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp
@@ -961,6 +961,7 @@ void GCNSchedStage::checkScheduling() {
unsigned MaxVGPRs = ST.getMaxNumVGPRs(MF);
unsigned MaxSGPRs = ST.getMaxNumSGPRs(MF);
+
if (PressureAfter.getVGPRNum(false) > MaxVGPRs ||
PressureAfter.getAGPRNum() > MaxVGPRs ||
PressureAfter.getSGPRNum() > MaxSGPRs) {
@@ -1183,9 +1184,8 @@ bool ILPInitialScheduleStage::shouldRevertScheduling(unsigned WavesAfter) {
}
bool GCNSchedStage::mayCauseSpilling(unsigned WavesAfter) {
- if (WavesAfter <= MFI.getMinWavesPerEU() &&
- !PressureAfter.less(ST, PressureBefore) &&
- isRegionWithExcessRP()) {
+ if (WavesAfter <= MFI.getMinWavesPerEU() && isRegionWithExcessRP() &&
+ !PressureAfter.less(MF, PressureBefore)) {
LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n");
return true;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4c624ed59427c9..3b8edc001f372c 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1292,6 +1292,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return AMDGPU::IsaInfo::getTotalNumVGPRs(this);
}
+ /// \returns Addressable number of architectural VGPRs supported by the
+ /// subtarget.
+ unsigned getAddressableNumArchVGPRs() const {
+ return AMDGPU::IsaInfo::getAddressableNumArchVGPRs(this);
+ }
+
/// \returns Addressable number of VGPRs supported by the subtarget.
unsigned getAddressableNumVGPRs() const {
return AMDGPU::IsaInfo::getAddressableNumVGPRs(this);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 7b8a37532c9fad..c9c56b86187dc9 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -1026,10 +1026,12 @@ unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
return IsWave32 ? 1024 : 512;
}
+unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI) { return 256; }
+
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
if (STI->getFeatureBits().test(FeatureGFX90AInsts))
return 512;
- return 256;
+ return getAddressableNumArchVGPRs(STI);
}
unsigned getNumWavesPerEUWithNumVGPRs(const MCSubtargetInfo *STI,
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 1e0994d0862cf5..ceee6915716db6 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -288,6 +288,10 @@ unsigned getVGPREncodingGranule(
/// \returns Total number of VGPRs for given subtarget \p STI.
unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI);
+/// \returns Addressable number of architectural VGPRs for a given subtarget \p
+/// STI.
+unsigned getAddressableNumArchVGPRs(const MCSubtargetInfo *STI);
+
/// \returns Addressable number of VGPRs for given subtarget \p STI.
unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI);
diff --git a/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
new file mode 100644
index 00000000000000..f50688240fe8bd
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/spill-regpressure-less.mir
@@ -0,0 +1,353 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+# RUN: llc -march=amdgcn -mcpu=gfx90a -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck -check-prefix=GCN %s
+
+--- |
+ define amdgpu_kernel void @spill_regpressure_less() #0 {
+ ret void
+ }
+
+ attributes #0 = { "amdgpu-waves-per-eu"="8,8" }
+...
+
+---
+name: spill_regpressure_less
+tracksRegLiveness: true
+machineFunctionInfo:
+ stackPtrOffsetReg: '$sgpr32'
+ occupancy: 8
+body: |
+ bb.0:
+ ; GCN-LABEL: name: spill_regpressure_less
+ ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF3:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF6:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF7:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF8:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF9:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF10:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF11:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF12:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF13:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF14:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF15:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF16:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF17:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF18:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF19:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF20:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF21:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF22:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF23:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF24:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF25:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF26:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF27:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF28:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF29:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF30:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF31:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF32:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF33:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF34:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF35:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF36:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF37:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF38:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF39:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF40:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF41:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF42:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF43:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF44:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF45:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF46:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF47:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF48:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF49:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF50:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF51:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF52:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF53:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF54:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF55:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF56:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF57:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF58:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF59:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF60:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF61:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF62:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF63:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF64:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF65:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF66:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF]], implicit [[DEF1]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]], implicit [[DEF10]], implicit [[DEF11]], implicit [[DEF12]], implicit [[DEF13]], implicit [[DEF14]], implicit [[DEF15]], implicit [[DEF16]], implicit [[DEF17]], implicit [[DEF18]], implicit [[DEF19]], implicit [[DEF20]], implicit [[DEF21]], implicit [[DEF22]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]], implicit [[DEF29]], implicit [[DEF30]], implicit [[DEF31]], implicit [[DEF32]], implicit [[DEF33]], implicit [[DEF34]], implicit [[DEF35]], implicit [[DEF36]], implicit [[DEF37]], implicit [[DEF38]], implicit [[DEF39]], implicit [[DEF40]], implicit [[DEF41]], implicit [[DEF42]], implicit [[DEF43]], implicit [[DEF44]], implicit [[DEF45]], implicit [[DEF46]], implicit [[DEF47]], implicit [[DEF48]], implicit [[DEF49]], implicit [[DEF50]], implicit [[DEF51]], implicit [[DEF52]], implicit [[DEF53]], implicit [[DEF54]], implicit [[DEF55]], implicit [[DEF56]], implicit [[DEF57]], implicit [[DEF58]], implicit [[DEF59]], implicit [[DEF60]], implicit [[DEF61]], implicit [[DEF62]], implicit [[DEF63]], implicit [[DEF64]], implicit [[DEF65]], implicit [[DEF66]]
+ ; GCN-NEXT: KILL [[DEF]]
+ ; GCN-NEXT: KILL [[DEF1]]
+ ; GCN-NEXT: KILL [[DEF10]]
+ ; GCN-NEXT: KILL [[DEF12]]
+ ; GCN-NEXT: KILL [[DEF13]]
+ ; GCN-NEXT: KILL [[DEF14]]
+ ; GCN-NEXT: KILL [[DEF15]]
+ ; GCN-NEXT: KILL [[DEF16]]
+ ; GCN-NEXT: [[DEF67:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: KILL [[DEF17]]
+ ; GCN-NEXT: [[DEF68:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF69:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
+ ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF69]], implicit [[DEF23]], implicit [[DEF24]], implicit [[DEF25]], implicit [[DEF26]], implicit [[DEF27]], implicit [[DEF28]]
+ ; GCN-NEXT: KILL [[DEF2]]
+ ; GCN-NEXT: KILL [[DEF3]]
+ ; GCN-NEXT: KILL [[DEF4]]
+ ; GCN-NEXT: KILL [[DEF5]]
+ ; GCN-NEXT: KILL [[DEF6]]
+ ; GCN-NEXT: KILL [[DEF7]]
+ ; GCN-NEXT: KILL [[DEF8]]
+ ; GCN-NEXT: KILL [[DEF9]]
+ ; GCN-NEXT: KILL [[DEF18]]
+ ; GCN-NEXT: KILL [[DEF19]]
+ ; GCN-NEXT: [[DEF70:%[0-9]+]]:sgpr_128 = IMPLICIT_DEF
+ ; GCN-NEXT: INLINEASM &"", 1 /* sideeffect attdialect */, implicit [[DEF70]], implicit [[DEF2]], implicit [[DEF3]], implicit [[DEF4]], implicit [[DEF5]], implicit [[DEF6]], implicit [[DEF7]], implicit [[DEF8]], implicit [[DEF9]]
+ ; GCN-NEXT: KILL [[DEF69]], implicit-def %70, implicit-def %71, implicit-def %72, implicit-def %73, implicit-def %74, implicit-def %75, implicit-def %76, implicit-def %77
+ ; GCN-NEXT: [[DEF71:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: [[DEF72:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: KILL [[DEF20]]
+ ; GCN-NEXT: [[DEF73:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: KILL [[DEF11]]
+ ; GCN-NEXT: [[DEF74:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: KILL [[DEF21]]
+ ; GCN-NEXT: [[DEF75:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: KILL [[DEF22]]
+ ; GCN-NEXT: [[DEF76:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
+ ; GCN-NEXT: KILL [[DEF23]]
+ ; GCN-NEXT: KILL [[DEF24]]
+ ; GCN-NEXT: KILL [[DEF25]]
+ ; GCN-NEXT: KILL [[DEF26]]
+ ; GCN-NEXT: KILL [[DEF27]]
+ ; GCN-NEXT: KILL [[DEF28]]
+ ; GCN-NEXT: KILL [[DEF29]]
+ ; GCN-NEXT: KILL [[DEF30]]
+ ; GCN-NEXT: KILL [[DEF31]]
+ ; GCN-NEXT: KILL [[DEF32]]
+ ; GCN-NEXT: KILL [[DEF33]]
+ ; GCN-NEXT: KILL [[DEF34]]
+ ; GCN-NEXT: KILL [[DEF35]]
+ ; GCN-NEXT: KILL [[DEF36]]
+ ; GCN-NEXT: KILL [[DEF37]]
+ ; GCN-NEXT: KILL [[DEF38]]
+ ; GCN-NEXT: KILL [[DEF39]]
+ ; GCN-NEXT: KILL [[DEF40]]
+ ; GCN-NEXT: KILL [[DEF41]]
+ ; GCN-NEXT: KILL [[DEF42]]
+ ; GCN-NEXT: KILL [[DEF43]]
+ ; GCN-NEXT: KILL [[DEF44]]
+ ; GCN-NEXT: KILL [[DEF45]]
+ ; GCN-NEXT: KILL [[DEF46]]
+ ; GCN-NEXT: KILL [[DEF47]]
+ ; GCN-NEXT: KILL [[DEF48]]
+ ; GCN-NEXT: KILL [[DEF49]]
+ ; GCN-NEXT: KILL [[DEF50]]
+ ; GCN-NEXT: KILL [[DEF51]]
+ ; GCN-NEXT: KILL [[DEF52]]
+ ; GCN-NEXT: KILL [[DEF53]]
+ ; GCN-NEXT: KILL [[DEF54]]
+ ; GCN-NEXT: KILL [[DEF55]]
+ ; GCN-NEXT: KILL [[DEF56]]
+ ; GCN-NEXT: KILL [[DEF57]]
+ ; GCN-NEXT: KILL [[DEF58]]
+ ; GCN-NEXT: KILL [[DEF59]]
+ ; GCN-NEXT: KILL [[DEF60]]
+ ; GCN-NEXT: KILL [[DEF61]]
+ ; GCN-NEXT: KILL [[DEF62]]
+ ; GCN-NEXT: KILL [[DEF63]]
+ ; GCN-NEXT: KILL [[DEF64]]
+ ; GCN-NEXT: KILL [[DEF65]]
+ ; GCN-NEXT: KILL [[DEF66]]
+ ; GCN-NEXT: KILL [[DEF67]]
+ ; GCN-NEXT: KILL [[DEF68]]
+ ; GCN-NEXT: KILL [[DEF71]]
+ ; GCN-NEXT: KILL [[DEF72]]
+ ; GCN-NEXT: KILL [[DEF73]]
+ ; GCN-NEXT: KILL [[DEF74]]
+ ; GCN-NEXT: KILL [[DEF75]]
+ ; GCN-NEXT: KILL [[DEF76]]
+ ; GCN-NEXT: KILL [[DEF70]]
+ ; GCN-NEXT: KILL %70
+ ; GCN-NEXT: KILL %71
+ ; GCN-NEXT: KILL %72
+ ; GCN-NEXT: KILL %73
+ ; GCN-NEXT: KILL %74
+ ; GCN-NEXT: KILL %75
+ ; GCN-NEXT: KILL %76
+ ; GCN-NEXT: KILL %77
+ %0:vgpr_32 = IMPLICIT_DEF
+ %1:vgpr_32 = IMPLICIT_DEF
+ %2:vgpr_32 = IMPLICIT_DEF
+ %3:vgpr_32 = IMPLICIT_DEF
+ %4:vgpr_32 = IMPLICIT_DEF
+ %5:vgpr_32 = IMPLICIT_DEF
+ %6:vgpr_32 = IMPLICIT_DEF
+ %7:vgpr_32 = IMPLICIT_DEF
+ %8:vgpr_32 = IMPLICIT_DEF
+ %9:vgpr_32 = IMPLICIT_DEF
+ %10:vgpr_32 = IMPLICIT_DEF
+ %11:vgpr_32 = IMPLICIT_DEF
+ %12:vgpr_32 = IMPLICIT_DEF
+ %13:vgpr_32 = IMPLICIT_DEF
+ %14:vgpr_32 = IMPLICIT_DEF
+ %15:vgpr_32 = IMPLICIT_DEF
+ %16:vgpr_32 = IMPLICIT_DEF
+ %17:vgpr_32 = IMPLICIT_DEF
+ %18:vgpr_32 = IMPLICIT_DEF
+ %19:vgpr_32 = IMPLICIT_DEF
+ %20:vgpr_32 = IMPLICIT_DEF
+ %21:vgpr_32 = IMPLICIT_DEF
+ %22:vgpr_32 = IMPLICIT_DEF
+ %23:vgpr_32 = IMPLICIT_DEF
+ %24:vgpr_32 = IMPLICIT_DEF
+ %25:vgpr_32 = IMPLICIT_DEF
+ %26:vgpr_32 = IMPLICIT_DEF
+ %27:vgpr_32 = IMPLICIT_DEF
+ %28:vgpr_32 = IMPLICIT_DEF
+ %29:vgpr_32 = IMPLICIT_DEF
+ %30:vgpr_32 = IMPLICIT_DEF
+ %31:vgpr_32 = IMPLICIT_DEF
+ %32:vgpr_32 = IMPLICIT_DEF
+ %33:vgpr_32 = IMPLICIT_DEF
+ %34:vgpr_32 = IMPLICIT_DEF
+ %35:vgpr_32 = IMPLICIT_DEF
+ %36:vgpr_32 = IMPLICIT_DEF
+ %37:vgpr_32 = IMPLICIT_DEF
+ %38:vgpr_32 = IMPLICIT_DEF
+ %39:vgpr_32 = IMPLICIT_DEF
+ %40:vgpr_32 = IMPLICIT_DEF
+ %41:vgpr_32 = IMPLICIT_DEF
+ %42:vgpr_32 = IMPLICIT_DEF
+ %43:vgpr_32 = IMPLICIT_DEF
+ %44:vgpr_32 = IMPLICIT_DEF
+ %45:vgpr_32 = IMPLICIT_DEF
+ %46:vgpr_32 = IMPLICIT_DEF
+ %47:vgpr_32 = IMPLICIT_DEF
+ %48:vgpr_32 = IMPLICIT_DEF
+ %49:vgpr_32 = IMPLICIT_DEF
+ %50:vgpr_32 = IMPLICIT_DEF
+ %51:vgpr_32 = IMPLICIT_DEF
+ %52:vgpr_32 = IMPLICIT_DEF
+ %53:vgpr_32 = IMPLICIT_DEF
+ %54:vgpr_32 = IMPLICIT_DEF
+ %55:vgpr_32 = IMPLICIT_DEF
+ %56:vgpr_32 = IMPLICIT_DEF
+ %57:vgpr_32 = IMPLICIT_DEF
+ %58:vgpr_32 = IMPLICIT_DEF
+ %59:vgpr_32 = IMPLICIT_DEF
+ %60:vgpr_32 = IMPLICIT_DEF
+ %61:vgpr_32 = IMPLICIT_DEF
+ %62:vgpr_32 = IMPLICIT_DEF
+ %63:vgpr_32 = IMPLICIT_DEF
+ %64:vgpr_32 = IMPLICIT_DEF
+ %65:vgpr_32 = IMPLICIT_DEF
+ %66:vgpr_32 = IMPLICIT_DEF
+ %67:vgpr_32 = IMPLICIT_DEF
+ %68:vgpr_32 = IMPLICIT_DEF
+ INLINEASM &"", 1, implicit %0, implicit %1, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13, implicit %14, implicit %15, implicit %16, implicit %17, implicit %18, implicit %19, implicit %20, implicit %21, implicit %22, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28, implicit %29, implicit %30, implicit %31, implicit %32, implicit %33, implicit %34, implicit %35, implicit %36, implicit %37, implicit %38, implicit %39, implicit %40, implicit %41, implicit %42, implicit %43, implicit %44, implicit %45, implicit %46, implicit %47, implicit %48, implicit %49, implicit %50, implicit %51, implicit %52, implicit %53, implicit %54, implicit %55, implicit %56, implicit %57, implicit %58, implicit %59, implicit %60, implicit %61, implicit %62, implicit %63, implicit %64, implicit %65, implicit %66
+ %69:sgpr_128 = IMPLICIT_DEF
+ INLINEASM &"", 1, implicit %69, implicit %23, implicit %24, implicit %25, implicit %26, implicit %27, implicit %28
+ KILL %0
+ KILL %1
+ KILL %2
+ KILL %3
+ KILL %4
+ KILL %5
+ KILL %6
+ KILL %7
+ KILL %8
+ KILL %9
+ KILL %10
+ KILL %12
+ KILL %13
+ KILL %14
+ KILL %15
+ KILL %16
+ KILL %17
+ KILL %18
+ KILL %19
+ KILL %69:sgpr_128, implicit-def %77:vgpr_32, implicit-def %78:vgpr_32, implicit-def %79:vgpr_32, implicit-def %80:vgpr_32, implicit-def %81:vgpr_32, implicit-def %82:vgpr_32, implicit-def %83:vgpr_32, implicit-def %84:vgpr_32
+ %70:vgpr_32 = IMPLICIT_DEF
+ %71:vgpr_32 = IMPLICIT_DEF
+ %72:vgpr_32 = IMPLICIT_DEF
+ %73:vgpr_32 = IMPLICIT_DEF
+ %74:vgpr_32 = IMPLICIT_DEF
+ %75:vgpr_32 = IMPLICIT_DEF
+ %76:sgpr_128 = IMPLICIT_DEF
+ INLINEASM &"", 1, implicit %76, implicit %2, implicit %3, implicit %4, implicit %5, implicit %6, implicit %7, implicit %8, implicit %9
+ KILL %20
+ KILL %11
+ KILL %21
+ KILL %22
+ KILL %23
+ KILL %24
+ KILL %25
+ KILL %26
+ KILL %27
+ KILL %28
+ KILL %29
+ KILL %30
+ KILL %31
+ KILL %32
+ KILL %33
+ KILL %34
+ KILL %35
+ KILL %36
+ KILL %37
+ KILL %38
+ KILL %39
+ KILL %40
+ KILL %41
+ KILL %42
+ KILL %43
+ KILL %44
+ KILL %45
+ KILL %46
+ KILL %47
+ KILL %48
+ KILL %49
+ KILL %50
+ KILL %51
+ KILL %52
+ KILL %53
+ KILL %54
+ KILL %55
+ KILL %56
+ KILL %57
+ KILL %58
+ KILL %59
+ KILL %60
+ KILL %61
+ KILL %62
+ KILL %63
+ KILL %64
+ KILL %65
+ KILL %66
+ KILL %67
+ KILL %68
+ KILL %70
+ KILL %71
+ KILL %72
+ KILL %73
+ KILL %74
+ KILL %75
+ KILL %76
+ KILL %77
+ KILL %78
+ KILL %79
+ KILL %80
+ KILL %81
+ KILL %82
+ KILL %83
+ KILL %84
+...
+## NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+# GCN: {{.*}}
More information about the llvm-commits
mailing list