[llvm] 01549dd - [AMDGPU] Base getSubRegFromChannel on TableGen data
Carl Ritson via llvm-commits
llvm-commits at lists.llvm.org
Wed Oct 14 04:26:05 PDT 2020
Author: Carl Ritson
Date: 2020-10-14T20:25:09+09:00
New Revision: 01549dd976faa93dcf18d7363ccf8b2509833f7c
URL: https://github.com/llvm/llvm-project/commit/01549dd976faa93dcf18d7363ccf8b2509833f7c
DIFF: https://github.com/llvm/llvm-project/commit/01549dd976faa93dcf18d7363ccf8b2509833f7c.diff
LOG: [AMDGPU] Base getSubRegFromChannel on TableGen data
Generate (at runtime) the table used to drive getSubRegFromChannel,
base on AMDGPUSubRegIdxRanges from TableGen data.
The is a step closer to it being staticly generated by TableGen and
allows getSubRegFromChannel handle all bitwidths in the mean time.
Reviewed By: rampitec, arsenm, foad
Differential Revision: https://reviews.llvm.org/D89217
Added:
Modified:
llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
llvm/lib/Target/AMDGPU/SIRegisterInfo.h
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 4123799bc570..7b1970f45a6c 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -40,6 +40,14 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
cl::init(true));
std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
+std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
+
+// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
+// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
+// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
+// meaning index 7 in SubRegFromChannelTable.
+static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
+ 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
: AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
@@ -78,8 +86,27 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST)
}
};
+ static llvm::once_flag InitializeSubRegFromChannelTableFlag;
+
+ static auto InitializeSubRegFromChannelTableOnce = [this]() {
+ for (auto &Row : SubRegFromChannelTable)
+ Row.fill(AMDGPU::NoSubRegister);
+ for (uint16_t Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
+ unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
+ unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
+ assert(Width < SubRegFromChannelTableWidthMap.size());
+ Width = SubRegFromChannelTableWidthMap[Width];
+ if (Width == 0)
+ continue;
+ assert((Width - 1) < SubRegFromChannelTable.size());
+ assert(Offset < SubRegFromChannelTable[Width].size());
+ SubRegFromChannelTable[Width - 1][Offset] = Idx;
+ }
+ };
llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
+ llvm::call_once(InitializeSubRegFromChannelTableFlag,
+ InitializeSubRegFromChannelTableOnce);
}
void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
@@ -156,71 +183,13 @@ const uint32_t *SIRegisterInfo::getAllAllocatableSRegMask() const {
return CSR_AMDGPU_AllAllocatableSRegs_RegMask;
}
-// FIXME: TableGen should generate something to make this manageable for all
-// register classes. At a minimum we could use the opposite of
-// composeSubRegIndices and go up from the base 32-bit subreg.
unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
unsigned NumRegs) {
- // Table of NumRegs sized pieces at every 32-bit offset.
- static const uint16_t SubRegFromChannelTable[][32] = {
- {AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
- AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
- AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
- AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
- AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
- AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
- AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
- AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31},
- {AMDGPU::sub0_sub1, AMDGPU::sub1_sub2, AMDGPU::sub2_sub3,
- AMDGPU::sub3_sub4, AMDGPU::sub4_sub5, AMDGPU::sub5_sub6,
- AMDGPU::sub6_sub7, AMDGPU::sub7_sub8, AMDGPU::sub8_sub9,
- AMDGPU::sub9_sub10, AMDGPU::sub10_sub11, AMDGPU::sub11_sub12,
- AMDGPU::sub12_sub13, AMDGPU::sub13_sub14, AMDGPU::sub14_sub15,
- AMDGPU::sub15_sub16, AMDGPU::sub16_sub17, AMDGPU::sub17_sub18,
- AMDGPU::sub18_sub19, AMDGPU::sub19_sub20, AMDGPU::sub20_sub21,
- AMDGPU::sub21_sub22, AMDGPU::sub22_sub23, AMDGPU::sub23_sub24,
- AMDGPU::sub24_sub25, AMDGPU::sub25_sub26, AMDGPU::sub26_sub27,
- AMDGPU::sub27_sub28, AMDGPU::sub28_sub29, AMDGPU::sub29_sub30,
- AMDGPU::sub30_sub31, AMDGPU::NoSubRegister},
- {AMDGPU::sub0_sub1_sub2, AMDGPU::sub1_sub2_sub3,
- AMDGPU::sub2_sub3_sub4, AMDGPU::sub3_sub4_sub5,
- AMDGPU::sub4_sub5_sub6, AMDGPU::sub5_sub6_sub7,
- AMDGPU::sub6_sub7_sub8, AMDGPU::sub7_sub8_sub9,
- AMDGPU::sub8_sub9_sub10, AMDGPU::sub9_sub10_sub11,
- AMDGPU::sub10_sub11_sub12, AMDGPU::sub11_sub12_sub13,
- AMDGPU::sub12_sub13_sub14, AMDGPU::sub13_sub14_sub15,
- AMDGPU::sub14_sub15_sub16, AMDGPU::sub15_sub16_sub17,
- AMDGPU::sub16_sub17_sub18, AMDGPU::sub17_sub18_sub19,
- AMDGPU::sub18_sub19_sub20, AMDGPU::sub19_sub20_sub21,
- AMDGPU::sub20_sub21_sub22, AMDGPU::sub21_sub22_sub23,
- AMDGPU::sub22_sub23_sub24, AMDGPU::sub23_sub24_sub25,
- AMDGPU::sub24_sub25_sub26, AMDGPU::sub25_sub26_sub27,
- AMDGPU::sub26_sub27_sub28, AMDGPU::sub27_sub28_sub29,
- AMDGPU::sub28_sub29_sub30, AMDGPU::sub29_sub30_sub31,
- AMDGPU::NoSubRegister, AMDGPU::NoSubRegister},
- {AMDGPU::sub0_sub1_sub2_sub3, AMDGPU::sub1_sub2_sub3_sub4,
- AMDGPU::sub2_sub3_sub4_sub5, AMDGPU::sub3_sub4_sub5_sub6,
- AMDGPU::sub4_sub5_sub6_sub7, AMDGPU::sub5_sub6_sub7_sub8,
- AMDGPU::sub6_sub7_sub8_sub9, AMDGPU::sub7_sub8_sub9_sub10,
- AMDGPU::sub8_sub9_sub10_sub11, AMDGPU::sub9_sub10_sub11_sub12,
- AMDGPU::sub10_sub11_sub12_sub13, AMDGPU::sub11_sub12_sub13_sub14,
- AMDGPU::sub12_sub13_sub14_sub15, AMDGPU::sub13_sub14_sub15_sub16,
- AMDGPU::sub14_sub15_sub16_sub17, AMDGPU::sub15_sub16_sub17_sub18,
- AMDGPU::sub16_sub17_sub18_sub19, AMDGPU::sub17_sub18_sub19_sub20,
- AMDGPU::sub18_sub19_sub20_sub21, AMDGPU::sub19_sub20_sub21_sub22,
- AMDGPU::sub20_sub21_sub22_sub23, AMDGPU::sub21_sub22_sub23_sub24,
- AMDGPU::sub22_sub23_sub24_sub25, AMDGPU::sub23_sub24_sub25_sub26,
- AMDGPU::sub24_sub25_sub26_sub27, AMDGPU::sub25_sub26_sub27_sub28,
- AMDGPU::sub26_sub27_sub28_sub29, AMDGPU::sub27_sub28_sub29_sub30,
- AMDGPU::sub28_sub29_sub30_sub31, AMDGPU::NoSubRegister,
- AMDGPU::NoSubRegister, AMDGPU::NoSubRegister}};
-
- const unsigned NumRegIndex = NumRegs - 1;
-
- assert(NumRegIndex < array_lengthof(SubRegFromChannelTable) &&
- "Not implemented");
- assert(Channel < array_lengthof(SubRegFromChannelTable[0]));
- return SubRegFromChannelTable[NumRegIndex][Channel];
+ assert(NumRegs < SubRegFromChannelTableWidthMap.size());
+ unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
+ assert(NumRegIndex && "Not implemented");
+ assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
+ return SubRegFromChannelTable[NumRegIndex - 1][Channel];
}
MCRegister SIRegisterInfo::reservedPrivateSegmentBufferReg(
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 62d9f1174337..5b5002605445 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -40,6 +40,11 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
/// all elements of the inner vector combined give a full lane mask.
static std::array<std::vector<int16_t>, 16> RegSplitParts;
+ // Table representing sub reg of given width and offset.
+ // First index is subreg size: 32, 64, 96, 128, 160, 192, 256, 512.
+ // Second index is 32
diff erent dword offsets.
+ static std::array<std::array<uint16_t, 32>, 9> SubRegFromChannelTable;
+
void reserveRegisterTuples(BitVector &, MCRegister Reg) const;
public:
More information about the llvm-commits
mailing list