[llvm] 5bc703f - [AMDGPU] Replace getPhysRegClass with getPhysRegBaseClass
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Mon Dec 19 23:23:31 PST 2022
Author: Carl Ritson
Date: 2022-12-20T16:22:14+09:00
New Revision: 5bc703f755579c0a0e38d8e77146e8e3dd8a955d
URL: https://github.com/llvm/llvm-project/commit/5bc703f755579c0a0e38d8e77146e8e3dd8a955d
DIFF: https://github.com/llvm/llvm-project/commit/5bc703f755579c0a0e38d8e77146e8e3dd8a955d.diff
LOG: [AMDGPU] Replace getPhysRegClass with getPhysRegBaseClass
Accelerate finding the base class for a physical register by
building a statically mapping table from physical registers
to base classes using TableGen.
Replace uses of SIRegisterInfo::getPhysRegClass with
TargetRegisterInfo::getPhysRegBaseClass in order to use
the computed table.
Reviewed By: arsenm, foad
Differential Revision: https://reviews.llvm.org/D139422
Added:
Modified:
llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/lib/Target/AMDGPU/SIRegisterInfo.td
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index 0260c0e44d9ae..9aa3222757eae 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -356,7 +356,7 @@ const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N,
const SIRegisterInfo *TRI
= static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo();
- return TRI->getPhysRegClass(Reg);
+ return TRI->getPhysRegBaseClass(Reg);
}
return nullptr;
@@ -1429,8 +1429,10 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent,
static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) {
if (Val.getOpcode() != ISD::CopyFromReg)
return false;
- auto RC =
- TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg());
+ auto Reg = cast<RegisterSDNode>(Val.getOperand(1))->getReg();
+ if (!Reg.isPhysical())
+ return false;
+ auto RC = TRI.getPhysRegBaseClass(Reg);
return RC && TRI.isSGPRClass(RC);
}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8b06ba2ef2e32..26aec15279ef0 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -1210,7 +1210,7 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
const MachineOperand *SDST = TII->getNamedOperand(*MI, SDSTName);
if (!SDST) {
for (const auto &MO : MI->implicit_operands()) {
- if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg()))) {
+ if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegBaseClass(MO.getReg()))) {
SDST = &MO;
break;
}
@@ -1291,7 +1291,7 @@ bool GCNHazardRecognizer::fixVcmpxExecWARHazard(MachineInstr *MI) {
if (TII->getNamedOperand(MI, AMDGPU::OpName::sdst))
return true;
for (auto MO : MI.implicit_operands())
- if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegClass(MO.getReg())))
+ if (MO.isDef() && TRI->isSGPRClass(TRI->getPhysRegBaseClass(MO.getReg())))
return true;
}
if (MI.getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index f0dfa5f8827d1..e5a028823e725 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -187,14 +187,14 @@ getCopyRegClasses(const MachineInstr &Copy,
const TargetRegisterClass *SrcRC = SrcReg.isVirtual()
? MRI.getRegClass(SrcReg)
- : TRI.getPhysRegClass(SrcReg);
+ : TRI.getPhysRegBaseClass(SrcReg);
// We don't really care about the subregister here.
// SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg());
const TargetRegisterClass *DstRC = DstReg.isVirtual()
? MRI.getRegClass(DstReg)
- : TRI.getPhysRegClass(DstReg);
+ : TRI.getPhysRegBaseClass(DstReg);
return std::pair(SrcRC, DstRC);
}
diff --git a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
index 4e82ef17a3466..1d7d0dfd9a949 100644
--- a/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
@@ -338,7 +338,7 @@ class PrologEpilogSGPRSpillBuilder {
ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL), FrameReg(FrameReg) {
- const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
+ const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
SplitParts = TRI.getRegSplitParts(RC, EltSize);
NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
@@ -1322,7 +1322,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
// Allocate spill slots for WWM reserved VGPRs.
if (!FuncInfo->isEntryFunction()) {
for (Register Reg : FuncInfo->getWWMReservedRegs()) {
- const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg);
+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Reg);
FuncInfo->allocateWWMSpill(MF, Reg, TRI->getSpillSize(*RC),
TRI->getSpillAlign(*RC));
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 6fc66187a1c6e..76fd98a174b64 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -12342,7 +12342,7 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI_,
auto Ret = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
if (Ret.first)
- Ret.second = TRI->getPhysRegClass(Ret.first);
+ Ret.second = TRI->getPhysRegBaseClass(Ret.first);
return Ret;
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 60077111d89a3..b29fa1ae77184 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -712,13 +712,13 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
const DebugLoc &DL, MCRegister DestReg,
MCRegister SrcReg, bool KillSrc) const {
- const TargetRegisterClass *RC = RI.getPhysRegClass(DestReg);
+ const TargetRegisterClass *RC = RI.getPhysRegBaseClass(DestReg);
// FIXME: This is hack to resolve copies between 16 bit and 32 bit
// registers until all patterns are fixed.
if (Fix16BitCopies &&
((RI.getRegSizeInBits(*RC) == 16) ^
- (RI.getRegSizeInBits(*RI.getPhysRegClass(SrcReg)) == 16))) {
+ (RI.getRegSizeInBits(*RI.getPhysRegBaseClass(SrcReg)) == 16))) {
MCRegister &RegToFix = (RI.getRegSizeInBits(*RC) == 16) ? DestReg : SrcReg;
MCRegister Super = RI.get32BitRegister(RegToFix);
assert(RI.getSubReg(Super, AMDGPU::lo16) == RegToFix);
@@ -730,7 +730,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- RC = RI.getPhysRegClass(DestReg);
+ RC = RI.getPhysRegBaseClass(DestReg);
}
if (RC == &AMDGPU::VGPR_32RegClass) {
@@ -920,7 +920,7 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
return;
}
- const TargetRegisterClass *SrcRC = RI.getPhysRegClass(SrcReg);
+ const TargetRegisterClass *SrcRC = RI.getPhysRegBaseClass(SrcReg);
if (RC == RI.getVGPR64Class() && (SrcRC == RC || RI.isSGPRClass(SrcRC))) {
if (ST.hasMovB64()) {
BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B64_e32), DestReg)
@@ -3211,7 +3211,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
Src0Inlined = true;
} else if ((Src0->getReg().isPhysical() &&
(ST.getConstantBusLimit(Opc) <= 1 &&
- RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
+ RI.isSGPRClass(RI.getPhysRegBaseClass(Src0->getReg())))) ||
(Src0->getReg().isVirtual() &&
(ST.getConstantBusLimit(Opc) <= 1 &&
RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
@@ -3228,7 +3228,7 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
commuteInstruction(UseMI)) {
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
} else if ((Src1->getReg().isPhysical() &&
- RI.isSGPRClass(RI.getPhysRegClass(Src1->getReg()))) ||
+ RI.isSGPRClass(RI.getPhysRegBaseClass(Src1->getReg()))) ||
(Src1->getReg().isVirtual() &&
RI.isSGPRClass(MRI->getRegClass(Src1->getReg()))))
return false;
@@ -4991,7 +4991,7 @@ const TargetRegisterClass *SIInstrInfo::getOpRegClass(const MachineInstr &MI,
if (Reg.isVirtual())
return MRI.getRegClass(Reg);
- return RI.getPhysRegClass(Reg);
+ return RI.getPhysRegBaseClass(Reg);
}
unsigned RCID = Desc.OpInfo[OpNo].RegClass;
@@ -8468,7 +8468,7 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
if (opcode == AMDGPU::COPY) {
const MachineOperand &srcOp = MI.getOperand(1);
if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
- const TargetRegisterClass *regClass = RI.getPhysRegClass(srcOp.getReg());
+ const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg());
return RI.isSGPRClass(regClass) ? InstructionUniformity::AlwaysUniform
: InstructionUniformity::NeverUniform;
}
@@ -8498,7 +8498,7 @@ SIInstrInfo::getInstructionUniformity(const MachineInstr &MI) const {
// Handling $vpgr reads
for (auto srcOp : MI.operands()) {
if (srcOp.isReg() && srcOp.getReg().isPhysical()) {
- const TargetRegisterClass *regClass = RI.getPhysRegClass(srcOp.getReg());
+ const TargetRegisterClass *regClass = RI.getPhysRegBaseClass(srcOp.getReg());
if (RI.isVGPRClass(regClass))
return InstructionUniformity::NeverUniform;
}
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 15660c60e45d5..9c524d7cb2e8d 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -11,13 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "SIRegisterInfo.h"
#include "AMDGPU.h"
#include "AMDGPURegisterBankInfo.h"
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUInstPrinter.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
+#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -122,7 +122,7 @@ struct SGPRSpillBuilder {
Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
IsWave32(IsWave32) {
- const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg);
+ const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
SplitParts = TRI.getRegSplitParts(RC, EltSize);
NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
@@ -2824,104 +2824,13 @@ SIRegisterInfo::getSGPRClassForBitWidth(unsigned BitWidth) {
return nullptr;
}
-// FIXME: This is very slow. It might be worth creating a map from physreg to
-// register class.
-const TargetRegisterClass *
-SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
- static const TargetRegisterClass *const BaseClasses[] = {
- &AMDGPU::VGPR_LO16RegClass,
- &AMDGPU::VGPR_HI16RegClass,
- &AMDGPU::SReg_LO16RegClass,
- &AMDGPU::AGPR_LO16RegClass,
- &AMDGPU::VGPR_32RegClass,
- &AMDGPU::SReg_32RegClass,
- &AMDGPU::AGPR_32RegClass,
- &AMDGPU::AGPR_32RegClass,
- &AMDGPU::VReg_64_Align2RegClass,
- &AMDGPU::VReg_64RegClass,
- &AMDGPU::SReg_64RegClass,
- &AMDGPU::AReg_64_Align2RegClass,
- &AMDGPU::AReg_64RegClass,
- &AMDGPU::VReg_96_Align2RegClass,
- &AMDGPU::VReg_96RegClass,
- &AMDGPU::SReg_96RegClass,
- &AMDGPU::AReg_96_Align2RegClass,
- &AMDGPU::AReg_96RegClass,
- &AMDGPU::VReg_128_Align2RegClass,
- &AMDGPU::VReg_128RegClass,
- &AMDGPU::SReg_128RegClass,
- &AMDGPU::AReg_128_Align2RegClass,
- &AMDGPU::AReg_128RegClass,
- &AMDGPU::VReg_160_Align2RegClass,
- &AMDGPU::VReg_160RegClass,
- &AMDGPU::SReg_160RegClass,
- &AMDGPU::AReg_160_Align2RegClass,
- &AMDGPU::AReg_160RegClass,
- &AMDGPU::VReg_192_Align2RegClass,
- &AMDGPU::VReg_192RegClass,
- &AMDGPU::SReg_192RegClass,
- &AMDGPU::AReg_192_Align2RegClass,
- &AMDGPU::AReg_192RegClass,
- &AMDGPU::VReg_224_Align2RegClass,
- &AMDGPU::VReg_224RegClass,
- &AMDGPU::SReg_224RegClass,
- &AMDGPU::AReg_224_Align2RegClass,
- &AMDGPU::AReg_224RegClass,
- &AMDGPU::VReg_256_Align2RegClass,
- &AMDGPU::VReg_256RegClass,
- &AMDGPU::SReg_256RegClass,
- &AMDGPU::AReg_256_Align2RegClass,
- &AMDGPU::AReg_256RegClass,
- &AMDGPU::VReg_288_Align2RegClass,
- &AMDGPU::VReg_288RegClass,
- &AMDGPU::SReg_288RegClass,
- &AMDGPU::AReg_288_Align2RegClass,
- &AMDGPU::AReg_288RegClass,
- &AMDGPU::VReg_320_Align2RegClass,
- &AMDGPU::VReg_320RegClass,
- &AMDGPU::SReg_320RegClass,
- &AMDGPU::AReg_320_Align2RegClass,
- &AMDGPU::AReg_320RegClass,
- &AMDGPU::VReg_352_Align2RegClass,
- &AMDGPU::VReg_352RegClass,
- &AMDGPU::SReg_352RegClass,
- &AMDGPU::AReg_352_Align2RegClass,
- &AMDGPU::AReg_352RegClass,
- &AMDGPU::VReg_384_Align2RegClass,
- &AMDGPU::VReg_384RegClass,
- &AMDGPU::SReg_384RegClass,
- &AMDGPU::AReg_384_Align2RegClass,
- &AMDGPU::AReg_384RegClass,
- &AMDGPU::VReg_512_Align2RegClass,
- &AMDGPU::VReg_512RegClass,
- &AMDGPU::SReg_512RegClass,
- &AMDGPU::AReg_512_Align2RegClass,
- &AMDGPU::AReg_512RegClass,
- &AMDGPU::SReg_1024RegClass,
- &AMDGPU::VReg_1024_Align2RegClass,
- &AMDGPU::VReg_1024RegClass,
- &AMDGPU::AReg_1024_Align2RegClass,
- &AMDGPU::AReg_1024RegClass,
- &AMDGPU::SCC_CLASSRegClass,
- &AMDGPU::Pseudo_SReg_32RegClass,
- &AMDGPU::Pseudo_SReg_128RegClass,
- };
-
- for (const TargetRegisterClass *BaseClass : BaseClasses) {
- if (BaseClass->contains(Reg)) {
- return BaseClass;
- }
- }
- return nullptr;
-}
-
bool SIRegisterInfo::isSGPRReg(const MachineRegisterInfo &MRI,
Register Reg) const {
const TargetRegisterClass *RC;
if (Reg.isVirtual())
RC = MRI.getRegClass(Reg);
else
- RC = getPhysRegClass(Reg);
+ RC = getPhysRegBaseClass(Reg);
return RC ? isSGPRClass(RC) : false;
}
@@ -3038,7 +2947,7 @@ ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC
const TargetRegisterClass*
SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
Register Reg) const {
- return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg);
+ return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
}
const TargetRegisterClass *
@@ -3248,7 +3157,7 @@ MachineInstr *SIRegisterInfo::findReachingDef(Register Reg, unsigned SubReg,
}
MCPhysReg SIRegisterInfo::get32BitRegister(MCPhysReg Reg) const {
- assert(getRegSizeInBits(*getPhysRegClass(Reg)) <= 32);
+ assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
AMDGPU::SReg_32RegClass,
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 17ce8d21980fd..d864f7e867adc 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -182,10 +182,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
LLVM_READONLY
static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth);
- /// Return the 'base' register class for this register.
- /// e.g. SGPR0 => SReg_32, VGPR => VGPR_32 SGPR0_SGPR1 -> SReg_32, etc.
- const TargetRegisterClass *getPhysRegClass(MCRegister Reg) const;
-
/// \returns true if this class contains only SGPR registers
static bool isSGPRClass(const TargetRegisterClass *RC) {
return hasSGPRs(RC) && !hasVGPRs(RC) && !hasAGPRs(RC);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index ccc65f0bef9b2..12053c4b87245 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -373,6 +373,7 @@ def SCC_CLASS : SIRegisterClass<"AMDGPU", [i1], 1, (add SCC)> {
let CopyCost = -1;
let isAllocatable = 0;
let HasSGPR = 1;
+ let BaseClassOrder = 10000;
}
def M0_CLASS : SIRegisterClass<"AMDGPU", [i32], 32, (add M0)> {
@@ -598,6 +599,7 @@ def VGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
let AllocationPriority = 0;
let Size = 16;
let GeneratePressureSet = 0;
+ let BaseClassOrder = 16;
}
def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
@@ -605,6 +607,7 @@ def VGPR_HI16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
let AllocationPriority = 0;
let Size = 16;
let GeneratePressureSet = 0;
+ let BaseClassOrder = 17;
}
// VGPR 32-bit registers
@@ -614,6 +617,7 @@ def VGPR_32 : SIRegisterClass<"AMDGPU", !listconcat(Reg32Types.types, Reg16Types
let AllocationPriority = 0;
let Size = 32;
let Weight = 1;
+ let BaseClassOrder = 32;
}
// Identical to VGPR_32 except it only contains the low 128 (Lo128) registers.
@@ -671,6 +675,7 @@ def AGPR_LO16 : SIRegisterClass<"AMDGPU", Reg16Types.types, 16,
let isAllocatable = 0;
let Size = 16;
let GeneratePressureSet = 0;
+ let BaseClassOrder = 16;
}
// AccVGPR 32-bit registers
@@ -679,6 +684,7 @@ def AGPR_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
let AllocationPriority = 0;
let Size = 32;
let Weight = 1;
+ let BaseClassOrder = 32;
}
} // End HasAGPR = 1
@@ -730,6 +736,7 @@ def Pseudo_SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16
let isAllocatable = 0;
let CopyCost = -1;
let HasSGPR = 1;
+ let BaseClassOrder = 10000;
}
def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f16], 32,
@@ -737,6 +744,7 @@ def Pseudo_SReg_128 : SIRegisterClass<"AMDGPU", [v4i32, v2i64, v2f64, v8i16, v8f
let isAllocatable = 0;
let CopyCost = -1;
let HasSGPR = 1;
+ let BaseClassOrder = 10000;
}
def LDS_DIRECT_CLASS : RegisterClass<"AMDGPU", [i32], 32,
@@ -767,6 +775,7 @@ def SReg_LO16 : SIRegisterClass<"AMDGPU", [i16, f16], 16,
SRC_EXECZ_LO16, SRC_SCC_LO16, EXEC_LO_LO16, EXEC_HI_LO16, M0_CLASS_LO16)> {
let Size = 16;
let AllocationPriority = 0;
+ let BaseClassOrder = 16;
}
def SReg_32_XEXEC : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1], 32,
@@ -791,6 +800,7 @@ def SReg_32 : SIRegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16, i1],
(add SReg_32_XM0, M0_CLASS)> {
let AllocationPriority = 0;
let HasSGPR = 1;
+ let BaseClassOrder = 32;
}
let GeneratePressureSet = 0 in {
@@ -826,6 +836,7 @@ def SReg_64 : SIRegisterClass<"AMDGPU", [v2i32, i64, v2f32, f64, i1, v4i16, v4f1
let CopyCost = 1;
let AllocationPriority = 1;
let HasSGPR = 1;
+ let BaseClassOrder = 64;
}
def SReg_1_XEXEC : SIRegisterClass<"AMDGPU", [i1], 32,
@@ -869,6 +880,7 @@ multiclass SRegClass<int numRegs,
!dag(add, [!cast<RegisterClass>(ttmpName)], ["ttmp"]),
(add)))> {
let isAllocatable = 0;
+ let BaseClassOrder = !mul(numRegs, 32);
}
}
}
@@ -911,10 +923,15 @@ class VRegClassBase<int numRegs, list<ValueType> regTypes, dag regList> :
multiclass VRegClass<int numRegs, list<ValueType> regTypes, dag regList> {
let HasVGPR = 1 in {
// Define the regular class.
- def "" : VRegClassBase<numRegs, regTypes, regList>;
+ def "" : VRegClassBase<numRegs, regTypes, regList> {
+ let BaseClassOrder = !mul(numRegs, 32);
+ }
// Define 2-aligned variant
- def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
+ def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)> {
+ // Give aligned class higher priority in base class resolution
+ let BaseClassOrder = !sub(!mul(numRegs, 32), 1);
+ }
}
}
@@ -940,10 +957,15 @@ defm VReg_1024 : VRegClass<32, [v32i32, v32f32, v16i64, v16f64], (add VGPR_1024)
multiclass ARegClass<int numRegs, list<ValueType> regTypes, dag regList> {
let CopyCost = !add(numRegs, numRegs, 1), HasAGPR = 1 in {
// Define the regular class.
- def "" : VRegClassBase<numRegs, regTypes, regList>;
+ def "" : VRegClassBase<numRegs, regTypes, regList> {
+ let BaseClassOrder = !mul(numRegs, 32);
+ }
// Define 2-aligned variant
- def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)>;
+ def _Align2 : VRegClassBase<numRegs, regTypes, (decimate regList, 2)> {
+ // Give aligned class higher priority in base class resolution
+ let BaseClassOrder = !sub(!mul(numRegs, 32), 1);
+ }
}
}
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index d1775bb9ae2de..4d6669f8f94da 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -607,7 +607,7 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
Register Reg = MO.getReg();
if (!Reg.isVirtual() &&
- TRI->hasVectorRegisters(TRI->getPhysRegClass(Reg))) {
+ TRI->hasVectorRegisters(TRI->getPhysRegBaseClass(Reg))) {
Flags = StateWQM;
break;
}
More information about the llvm-commits
mailing list