[llvm] [AMDGPU][True16][CodeGen] update waitcnt for true16 (PR #128927)
Brox Chen via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 11:01:11 PDT 2025
================
@@ -748,27 +748,33 @@ RegInterval WaitcntBrackets::getRegInterval(const MachineInstr *MI,
RegInterval Result;
- unsigned Reg = TRI->getEncodingValue(AMDGPU::getMCReg(Op.getReg(), *ST)) &
- AMDGPU::HWEncoding::REG_IDX_MASK;
+ MCRegister MCReg = AMDGPU::getMCReg(Op.getReg(), *ST);
+ unsigned RegIdx = TRI->getHWRegIndex(MCReg);
+ assert(isUInt<8>(RegIdx));
+ unsigned Reg = (RegIdx << 1) | (AMDGPU::isHi16Reg(MCReg, *TRI) ? 1 : 0);
+ const TargetRegisterClass *RC = TRI->getPhysRegBaseClass(Op.getReg());
+ unsigned Size = TRI->getRegSizeInBits(*RC);
+
+ // AGPRs/VGPRs are tracked every 16 bits, SGPRs by 32 bits
if (TRI->isVectorRegister(*MRI, Op.getReg())) {
assert(Reg <= SQ_MAX_PGM_VGPRS);
Result.first = Reg;
if (TRI->isAGPR(*MRI, Op.getReg()))
Result.first += AGPR_OFFSET;
assert(Result.first >= 0 && Result.first < SQ_MAX_PGM_VGPRS);
- } else if (TRI->isSGPRReg(*MRI, Op.getReg()) && Reg < SQ_MAX_PGM_SGPRS) {
+ assert(Size % 16 == 0);
+ Result.second = Result.first + (Size / 16);
+ } else if (TRI->isSGPRReg(*MRI, Op.getReg()) &&
+ (Reg >> 1) < SQ_MAX_PGM_SGPRS) {
// SGPRs including VCC, TTMPs and EXEC but excluding read-only scalar
// sources like SRC_PRIVATE_BASE.
- Result.first = Reg + NUM_ALL_VGPRS;
+ Result.first = (Reg >> 1) + NUM_ALL_VGPRS;
+ Result.second = Result.first + ((Size + 16) / 32);
----------------
broxigarchen wrote:
updated
https://github.com/llvm/llvm-project/pull/128927
More information about the llvm-commits
mailing list