[llvm] 7834194 - TableGen: Introduce generated getSubRegisterClass function
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 12 06:03:45 PDT 2022
Author: Matt Arsenault
Date: 2022-09-12T09:03:37-04:00
New Revision: 78341948370b56cd98c0b70dd9c51612b97d7621
URL: https://github.com/llvm/llvm-project/commit/78341948370b56cd98c0b70dd9c51612b97d7621
DIFF: https://github.com/llvm/llvm-project/commit/78341948370b56cd98c0b70dd9c51612b97d7621.diff
LOG: TableGen: Introduce generated getSubRegisterClass function
Currently there isn't a generic way to get a smaller register class
that can be produced from a subregister of a larger class. Replaces a
manually implemented version for AMDGPU. This will be used to improve
subregister support in the allocator.
Added:
Modified:
llvm/include/llvm/CodeGen/TargetRegisterInfo.h
llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
llvm/utils/TableGen/RegisterInfoEmitter.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
index ab3ec53909b39..8439093210a19 100644
--- a/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetRegisterInfo.h
@@ -637,6 +637,14 @@ class TargetRegisterInfo : public MCRegisterInfo {
return RC;
}
+ /// Return a register class that can be used for a subregister copy from/into
+ /// \p SuperRC at \p SubRegIdx.
+ virtual const TargetRegisterClass *
+ getSubRegisterClass(const TargetRegisterClass *SuperRC,
+ unsigned SubRegIdx) const {
+ return nullptr;
+ }
+
/// Return the subregister index you get from composing
/// two subregister indices.
///
diff --git a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 0a9734a791e79..ff17a9b87772a 100644
--- a/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -309,14 +309,10 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
bool IsAGPR = TRI->isAGPRClass(DstRC);
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
- Register SrcReg = MI.getOperand(I).getReg();
- unsigned SrcSubReg = MI.getOperand(I).getSubReg();
-
- const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
+ const TargetRegisterClass *SrcRC =
+ TRI->getRegClassForOperandReg(MRI, MI.getOperand(I));
assert(TRI->isSGPRClass(SrcRC) &&
"Expected SGPR REG_SEQUENCE to only have SGPR inputs");
-
- SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg);
const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC);
Register TmpReg = MRI.createVirtualRegister(NewSrcRC);
@@ -1109,8 +1105,8 @@ void SIFixSGPRCopies::lowerVGPR2SGPRCopies(MachineFunction &MF) {
Register DstReg = MI->getOperand(0).getReg();
Register SrcReg = MI->getOperand(1).getReg();
unsigned SubReg = MI->getOperand(1).getSubReg();
- const TargetRegisterClass *SrcRC = TRI->getRegClassForReg(*MRI, SrcReg);
- SrcRC = TRI->getSubRegClass(SrcRC, SubReg);
+ const TargetRegisterClass *SrcRC =
+ TRI->getRegClassForOperandReg(*MRI, MI->getOperand(1));
size_t SrcSize = TRI->getRegSizeInBits(*SrcRC);
if (SrcSize == 16) {
// HACK to handle possible 16bit VGPR source
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 275a87e033baf..21e755474de1d 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -900,9 +900,8 @@ void SIFoldOperands::foldOperand(
TRI->getRegClassForReg(*MRI, OpToFold.getReg());
if (TRI->hasVectorRegisters(RC) && OpToFold.getSubReg()) {
unsigned SubReg = OpToFold.getSubReg();
- const TargetRegisterClass *SubRC = TRI->getSubRegClass(RC, SubReg);
- RC = TRI->getCompatibleSubRegClass(RC, SubRC, SubReg);
- if (RC)
+ if (const TargetRegisterClass *SubRC =
+ TRI->getSubRegisterClass(RC, SubReg))
RC = SubRC;
}
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index fe5ceb2778110..99b3c2b17d0f2 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -4071,9 +4071,9 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
: &AMDGPU::VReg_64RegClass;
const TargetRegisterClass *Src0SubRC =
- TRI->getSubRegClass(Src0RC, AMDGPU::sub0);
+ TRI->getSubRegisterClass(Src0RC, AMDGPU::sub0);
const TargetRegisterClass *Src1SubRC =
- TRI->getSubRegClass(Src1RC, AMDGPU::sub1);
+ TRI->getSubRegisterClass(Src1RC, AMDGPU::sub1);
MachineOperand SrcReg0Sub0 = TII->buildExtractSubRegOrImm(
MI, MRI, Src0, Src0RC, AMDGPU::sub0, Src0SubRC);
@@ -4159,7 +4159,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
.addImm(0);
} else {
const TargetRegisterClass *SubRC =
- TRI->getSubRegClass(Src2RC, AMDGPU::sub0);
+ TRI->getSubRegisterClass(Src2RC, AMDGPU::sub0);
MachineOperand Src2Sub0 = TII->buildExtractSubRegOrImm(
MII, MRI, Src2, Src2RC, AMDGPU::sub0, SubRC);
MachineOperand Src2Sub1 = TII->buildExtractSubRegOrImm(
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index d699166287f5d..4ee17cd4e8dbf 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4104,7 +4104,7 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI,
const TargetRegisterClass *RC = RI.getRegClassForReg(MRI, Reg);
if (RI.hasVectorRegisters(RC) && MO.getSubReg()) {
const TargetRegisterClass *SubRC =
- RI.getSubRegClass(RC, MO.getSubReg());
+ RI.getSubRegisterClass(RC, MO.getSubReg());
RC = RI.getCompatibleSubRegClass(RC, SubRC, MO.getSubReg());
if (RC)
RC = SubRC;
@@ -6718,14 +6718,16 @@ void SIInstrInfo::splitScalar64BitUnaryOp(
MRI.getRegClass(Src0.getReg()) :
&AMDGPU::SGPR_32RegClass;
- const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+ const TargetRegisterClass *Src0SubRC =
+ RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub0, Src0SubRC);
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
- const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
+ const TargetRegisterClass *NewDestSubRC =
+ RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0).add(SrcReg0Sub0);
@@ -6782,8 +6784,10 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
const TargetRegisterClass *Src0RC = MRI.getRegClass(Src0.getReg());
const TargetRegisterClass *Src1RC = MRI.getRegClass(Src1.getReg());
- const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
- const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
+ const TargetRegisterClass *Src0SubRC =
+ RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
+ const TargetRegisterClass *Src1SubRC =
+ RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub0, Src0SubRC);
@@ -6848,12 +6852,14 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
MRI.getRegClass(Src0.getReg()) :
&AMDGPU::SGPR_32RegClass;
- const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0);
+ const TargetRegisterClass *Src0SubRC =
+ RI.getSubRegisterClass(Src0RC, AMDGPU::sub0);
const TargetRegisterClass *Src1RC = Src1.isReg() ?
MRI.getRegClass(Src1.getReg()) :
&AMDGPU::SGPR_32RegClass;
- const TargetRegisterClass *Src1SubRC = RI.getSubRegClass(Src1RC, AMDGPU::sub0);
+ const TargetRegisterClass *Src1SubRC =
+ RI.getSubRegisterClass(Src1RC, AMDGPU::sub0);
MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC,
AMDGPU::sub0, Src0SubRC);
@@ -6866,7 +6872,8 @@ void SIInstrInfo::splitScalar64BitBinaryOp(SetVectorType &Worklist,
const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg());
const TargetRegisterClass *NewDestRC = RI.getEquivalentVGPRClass(DestRC);
- const TargetRegisterClass *NewDestSubRC = RI.getSubRegClass(NewDestRC, AMDGPU::sub0);
+ const TargetRegisterClass *NewDestSubRC =
+ RI.getSubRegisterClass(NewDestRC, AMDGPU::sub0);
Register DestSub0 = MRI.createVirtualRegister(NewDestSubRC);
MachineInstr &LoHalf = *BuildMI(MBB, MII, DL, InstDesc, DestSub0)
@@ -6955,7 +6962,8 @@ void SIInstrInfo::splitScalar64BitBCNT(
Register MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0);
+ const TargetRegisterClass *SrcSubRC =
+ RI.getSubRegisterClass(SrcRC, AMDGPU::sub0);
MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC,
AMDGPU::sub0, SrcSubRC);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index e20e1ebe0c0e9..6b52f8e82dbf8 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -2723,26 +2723,6 @@ SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const {
return SRC;
}
-const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
- const TargetRegisterClass *RC, unsigned SubIdx) const {
- if (SubIdx == AMDGPU::NoSubRegister)
- return RC;
-
- // We can assume that each lane corresponds to one 32-bit register.
- unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
- if (isAGPRClass(RC)) {
- RC = getAGPRClassForBitWidth(Size);
- } else if (isVGPRClass(RC)) {
- RC = getVGPRClassForBitWidth(Size);
- } else if (isVectorSuperClass(RC)) {
- RC = getVectorSuperClassForBitWidth(Size);
- } else {
- RC = getSGPRClassForBitWidth(Size);
- }
- assert(RC && "Invalid sub-register class size");
- return RC;
-}
-
const TargetRegisterClass *
SIRegisterInfo::getCompatibleSubRegClass(const TargetRegisterClass *SuperRC,
const TargetRegisterClass *SubRC,
@@ -2833,6 +2813,13 @@ SIRegisterInfo::getRegClassForReg(const MachineRegisterInfo &MRI,
return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegClass(Reg);
}
+const TargetRegisterClass *
+SIRegisterInfo::getRegClassForOperandReg(const MachineRegisterInfo &MRI,
+ const MachineOperand &MO) const {
+ const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg());
+ return getSubRegisterClass(SrcRC, MO.getSubReg());
+}
+
bool SIRegisterInfo::isVGPR(const MachineRegisterInfo &MRI,
Register Reg) const {
const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 289c5ba75e9da..de54c5eb9ac61 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -247,12 +247,6 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
const TargetRegisterClass *
getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
- /// \returns The canonical register class that is used for a sub-register of
- /// \p RC for the given \p SubIdx. If \p SubIdx equals NoSubRegister, \p RC
- /// will be returned.
- const TargetRegisterClass *getSubRegClass(const TargetRegisterClass *RC,
- unsigned SubIdx) const;
-
/// Returns a register class which is compatible with \p SuperRC, such that a
/// subregister exists with class \p SubRC with subregister index \p
/// SubIdx. If this is impossible (e.g., an unaligned subregister index within
@@ -283,6 +277,10 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
const TargetRegisterClass *getRegClassForReg(const MachineRegisterInfo &MRI,
Register Reg) const;
+ const TargetRegisterClass *
+ getRegClassForOperandReg(const MachineRegisterInfo &MRI,
+ const MachineOperand &MO) const;
+
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const;
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const;
bool isVectorRegister(const MachineRegisterInfo &MRI, Register Reg) const {
diff --git a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
index a5798afab5958..f4986376742f9 100644
--- a/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+++ b/llvm/lib/Target/AMDGPU/SIWholeQuadMode.cpp
@@ -1448,14 +1448,10 @@ void SIWholeQuadMode::lowerCopyInstrs() {
assert(MI->getNumExplicitOperands() == 2);
const Register Reg = MI->getOperand(0).getReg();
- const unsigned SubReg = MI->getOperand(0).getSubReg();
-
- if (TRI->isVGPR(*MRI, Reg)) {
- const TargetRegisterClass *regClass =
- Reg.isVirtual() ? MRI->getRegClass(Reg) : TRI->getPhysRegClass(Reg);
- if (SubReg)
- regClass = TRI->getSubRegClass(regClass, SubReg);
+ const TargetRegisterClass *regClass =
+ TRI->getRegClassForOperandReg(*MRI, MI->getOperand(0));
+ if (TRI->isVGPRClass(regClass)) {
const unsigned MovOp = TII->getMovOpcode(regClass);
MI->setDesc(TII->get(MovOp));
diff --git a/llvm/utils/TableGen/RegisterInfoEmitter.cpp b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
index f6ae647ff27f5..3f221884a26f5 100644
--- a/llvm/utils/TableGen/RegisterInfoEmitter.cpp
+++ b/llvm/utils/TableGen/RegisterInfoEmitter.cpp
@@ -1169,6 +1169,8 @@ RegisterInfoEmitter::runTargetHeader(raw_ostream &OS, CodeGenTarget &Target,
<< " LaneBitmask reverseComposeSubRegIndexLaneMaskImpl"
<< "(unsigned, LaneBitmask) const override;\n"
<< " const TargetRegisterClass *getSubClassWithSubReg"
+ << "(const TargetRegisterClass *, unsigned) const override;\n"
+ << " const TargetRegisterClass *getSubRegisterClass"
<< "(const TargetRegisterClass *, unsigned) const override;\n";
}
OS << " const RegClassWeight &getRegClassWeight("
@@ -1511,16 +1513,16 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
emitComposeSubRegIndexLaneMask(OS, RegBank, ClassName);
}
- // Emit getSubClassWithSubReg.
if (!SubRegIndices.empty()) {
+ // Emit getSubClassWithSubReg.
OS << "const TargetRegisterClass *" << ClassName
<< "::getSubClassWithSubReg(const TargetRegisterClass *RC, unsigned Idx)"
<< " const {\n";
// Use the smallest type that can hold a regclass ID with room for a
// sentinel.
- if (RegisterClasses.size() < UINT8_MAX)
+ if (RegisterClasses.size() <= UINT8_MAX)
OS << " static const uint8_t Table[";
- else if (RegisterClasses.size() < UINT16_MAX)
+ else if (RegisterClasses.size() <= UINT16_MAX)
OS << " static const uint16_t Table[";
else
PrintFatalError("Too many register classes.");
@@ -1541,6 +1543,53 @@ RegisterInfoEmitter::runTargetDesc(raw_ostream &OS, CodeGenTarget &Target,
<< " assert(Idx < " << SubRegIndicesSize << " && \"Bad subreg\");\n"
<< " unsigned TV = Table[RC->getID()][Idx];\n"
<< " return TV ? getRegClass(TV - 1) : nullptr;\n}\n\n";
+
+ // Emit getSubRegisterClass
+ OS << "const TargetRegisterClass *" << ClassName
+ << "::getSubRegisterClass(const TargetRegisterClass *RC, unsigned Idx)"
+ << " const {\n";
+
+ // Use the smallest type that can hold a regclass ID with room for a
+ // sentinel.
+ if (RegisterClasses.size() <= UINT8_MAX)
+ OS << " static const uint8_t Table[";
+ else if (RegisterClasses.size() <= UINT16_MAX)
+ OS << " static const uint16_t Table[";
+ else
+ PrintFatalError("Too many register classes.");
+
+ OS << RegisterClasses.size() << "][" << SubRegIndicesSize << "] = {\n";
+
+ for (const auto &RC : RegisterClasses) {
+ OS << " {\t// " << RC.getName() << '\n';
+ for (auto &Idx : SubRegIndices) {
+ Optional<std::pair<CodeGenRegisterClass *, CodeGenRegisterClass *>>
+ MatchingSubClass = RC.getMatchingSubClassWithSubRegs(RegBank, &Idx);
+
+ unsigned EnumValue = 0;
+ if (MatchingSubClass) {
+ CodeGenRegisterClass *SubRegClass = MatchingSubClass->second;
+ EnumValue = SubRegClass->EnumValue + 1;
+ }
+
+ OS << " " << EnumValue << ",\t// "
+ << RC.getName() << ':' << Idx.getName();
+
+ if (MatchingSubClass) {
+ CodeGenRegisterClass *SubRegClass = MatchingSubClass->second;
+ OS << " -> " << SubRegClass->getName();
+ }
+
+ OS << '\n';
+ }
+
+ OS << " },\n";
+ }
+ OS << " };\n assert(RC && \"Missing regclass\");\n"
+ << " if (!Idx) return RC;\n --Idx;\n"
+ << " assert(Idx < " << SubRegIndicesSize << " && \"Bad subreg\");\n"
+ << " unsigned TV = Table[RC->getID()][Idx];\n"
+ << " return TV ? getRegClass(TV - 1) : nullptr;\n}\n\n";
}
EmitRegUnitPressure(OS, RegBank, ClassName);
More information about the llvm-commits
mailing list