[llvm] r359754 - [AMDGPU] gfx1010 constant bus limit
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Wed May 1 20:47:23 PDT 2019
Author: rampitec
Date: Wed May 1 20:47:23 2019
New Revision: 359754
URL: http://llvm.org/viewvc/llvm-project?rev=359754&view=rev
Log:
[AMDGPU] gfx1010 constant bus limit
Constant bus limit has increased to 2 with GFX10.
Differential Revision: https://reviews.llvm.org/D61404
Modified:
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp?rev=359754&r1=359753&r2=359754&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.cpp Wed May 1 20:47:23 2019
@@ -256,6 +256,26 @@ GCNSubtarget::GCNSubtarget(const Triple
*this, *static_cast<AMDGPURegisterBankInfo *>(RegBankInfo.get()), TM));
}
+unsigned GCNSubtarget::getConstantBusLimit(unsigned Opcode) const {
+ if (getGeneration() < GFX10)
+ return 1;
+
+ switch (Opcode) {
+ case AMDGPU::V_LSHLREV_B64:
+ case AMDGPU::V_LSHLREV_B64_gfx10:
+ case AMDGPU::V_LSHL_B64:
+ case AMDGPU::V_LSHRREV_B64:
+ case AMDGPU::V_LSHRREV_B64_gfx10:
+ case AMDGPU::V_LSHR_B64:
+ case AMDGPU::V_ASHRREV_I64:
+ case AMDGPU::V_ASHRREV_I64_gfx10:
+ case AMDGPU::V_ASHR_I64:
+ return 1;
+ }
+
+ return 2;
+}
+
unsigned AMDGPUSubtarget::getMaxLocalMemSizeWithWaveCount(unsigned NWaves,
const Function &F) const {
if (NWaves == 1)
Modified: llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h?rev=359754&r1=359753&r2=359754&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/AMDGPUSubtarget.h Wed May 1 20:47:23 2019
@@ -443,6 +443,8 @@ public:
return MaxPrivateElementSize;
}
+ unsigned getConstantBusLimit(unsigned Opcode) const;
+
bool hasIntClamp() const {
return HasIntClamp;
}
Modified: llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp?rev=359754&r1=359753&r2=359754&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp Wed May 1 20:47:23 2019
@@ -2443,6 +2443,8 @@ bool AMDGPUAsmParser::validateConstantBu
const unsigned Opcode = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opcode);
unsigned ConstantBusUseCount = 0;
+ unsigned NumLiterals = 0;
+ unsigned LiteralSize;
if (Desc.TSFlags &
(SIInstrFlags::VOPC |
@@ -2454,8 +2456,10 @@ bool AMDGPUAsmParser::validateConstantBu
++ConstantBusUseCount;
}
+ SmallDenseSet<unsigned> SGPRsUsed;
unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
if (SGPRUsed != AMDGPU::NoRegister) {
+ SGPRsUsed.insert(SGPRUsed);
++ConstantBusUseCount;
}
@@ -2478,16 +2482,42 @@ bool AMDGPUAsmParser::validateConstantBu
// flat_scratch_lo, flat_scratch_hi
// are theoretically valid but they are disabled anyway.
// Note that this code mimics SIInstrInfo::verifyInstruction
- if (Reg != SGPRUsed) {
+ if (!SGPRsUsed.count(Reg)) {
+ SGPRsUsed.insert(Reg);
++ConstantBusUseCount;
}
SGPRUsed = Reg;
} else { // Expression or a literal
- ++ConstantBusUseCount;
+
+ if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
+ continue; // special operand like VINTERP attr_chan
+
+ // An instruction may use only one literal.
+ // This has been validated on the previous step.
+ // See validateVOP3Literal.
+ // This literal may be used as more than one operand.
+ // If all these operands are of the same size,
+ // this literal counts as one scalar value.
+ // Otherwise it counts as 2 scalar values.
+ // See "GFX10 Shader Programming", section 3.6.2.3.
+
+ unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
+ if (Size < 4) Size = 4;
+
+ if (NumLiterals == 0) {
+ NumLiterals = 1;
+ LiteralSize = Size;
+ } else if (LiteralSize != Size) {
+ NumLiterals = 2;
+ }
}
}
}
}
+ ConstantBusUseCount += NumLiterals;
+
+ if (isGFX10())
+ return ConstantBusUseCount <= 2;
return ConstantBusUseCount <= 1;
}
Modified: llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp?rev=359754&r1=359753&r2=359754&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInstrInfo.cpp Wed May 1 20:47:23 2019
@@ -2151,9 +2151,11 @@ bool SIInstrInfo::FoldImmediate(MachineI
Src0->ChangeToImmediate(Def->getOperand(1).getImm());
Src0Inlined = true;
} else if ((RI.isPhysicalRegister(Src0->getReg()) &&
- RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg()))) ||
+ (ST.getConstantBusLimit(Opc) <= 1 &&
+ RI.isSGPRClass(RI.getPhysRegClass(Src0->getReg())))) ||
(RI.isVirtualRegister(Src0->getReg()) &&
- RI.isSGPRClass(MRI->getRegClass(Src0->getReg()))))
+ (ST.getConstantBusLimit(Opc) <= 1 &&
+ RI.isSGPRClass(MRI->getRegClass(Src0->getReg())))))
return false;
// VGPR is okay as Src0 - fallthrough
}
@@ -2350,7 +2352,9 @@ MachineInstr *SIInstrInfo::convertToThre
if (!IsFMA && !Src0Mods && !Src1Mods && !Clamp && !Omod &&
// If we have an SGPR input, we will violate the constant bus restriction.
- (!Src0->isReg() || !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
+ (ST.getConstantBusLimit(Opc) > 1 ||
+ !Src0->isReg() ||
+ !RI.isSGPRReg(MBB->getParent()->getRegInfo(), Src0->getReg()))) {
if (auto Imm = getFoldableImm(Src2)) {
return BuildMI(*MBB, MI, MI.getDebugLoc(),
get(IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32))
@@ -3090,9 +3094,12 @@ bool SIInstrInfo::verifyInstruction(cons
if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1)
++ConstantBusCount;
+ SmallVector<unsigned, 2> SGPRsUsed;
unsigned SGPRUsed = findImplicitSGPRRead(MI);
- if (SGPRUsed != AMDGPU::NoRegister)
+ if (SGPRUsed != AMDGPU::NoRegister) {
++ConstantBusCount;
+ SGPRsUsed.push_back(SGPRUsed);
+ }
for (int OpIdx : OpIndices) {
if (OpIdx == -1)
@@ -3100,23 +3107,37 @@ bool SIInstrInfo::verifyInstruction(cons
const MachineOperand &MO = MI.getOperand(OpIdx);
if (usesConstantBus(MRI, MO, MI.getDesc().OpInfo[OpIdx])) {
if (MO.isReg()) {
- if (MO.getReg() != SGPRUsed)
- ++ConstantBusCount;
SGPRUsed = MO.getReg();
+ if (llvm::all_of(SGPRsUsed, [this, SGPRUsed](unsigned SGPR) {
+ return !RI.regsOverlap(SGPRUsed, SGPR);
+ })) {
+ ++ConstantBusCount;
+ SGPRsUsed.push_back(SGPRUsed);
+ }
} else {
++ConstantBusCount;
++LiteralCount;
}
}
}
- if (ConstantBusCount > 1) {
- ErrInfo = "VOP* instruction uses the constant bus more than once";
+ const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
+ // v_writelane_b32 is an exception from constant bus restriction:
+ // vsrc0 can be sgpr, const or m0 and lane select sgpr, m0 or inline-const
+ if (ConstantBusCount > ST.getConstantBusLimit(Opcode) &&
+ Opcode != AMDGPU::V_WRITELANE_B32) {
+ ErrInfo = "VOP* instruction violates constant bus restriction";
return false;
}
if (isVOP3(MI) && LiteralCount) {
- ErrInfo = "VOP3 instruction uses literal";
- return false;
+ if (LiteralCount && !ST.hasVOP3Literal()) {
+ ErrInfo = "VOP3 instruction uses literal";
+ return false;
+ }
+ if (LiteralCount > 1) {
+ ErrInfo = "VOP3 instruction uses more than one literal";
+ return false;
+ }
}
}
@@ -3509,31 +3530,47 @@ bool SIInstrInfo::isLegalVSrcOperand(con
bool SIInstrInfo::isOperandLegal(const MachineInstr &MI, unsigned OpIdx,
const MachineOperand *MO) const {
- const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
+ const MachineFunction &MF = *MI.getParent()->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
const MCInstrDesc &InstDesc = MI.getDesc();
const MCOperandInfo &OpInfo = InstDesc.OpInfo[OpIdx];
+ const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const TargetRegisterClass *DefinedRC =
OpInfo.RegClass != -1 ? RI.getRegClass(OpInfo.RegClass) : nullptr;
if (!MO)
MO = &MI.getOperand(OpIdx);
+ int ConstantBusLimit = ST.getConstantBusLimit(MI.getOpcode());
+ int VOP3LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
if (isVALU(MI) && usesConstantBus(MRI, *MO, OpInfo)) {
+ if (isVOP3(MI) && isLiteralConstantLike(*MO, OpInfo) && !VOP3LiteralLimit--)
+ return false;
- RegSubRegPair SGPRUsed;
+ SmallDenseSet<RegSubRegPair> SGPRsUsed;
if (MO->isReg())
- SGPRUsed = RegSubRegPair(MO->getReg(), MO->getSubReg());
+ SGPRsUsed.insert(RegSubRegPair(MO->getReg(), MO->getSubReg()));
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
if (i == OpIdx)
continue;
const MachineOperand &Op = MI.getOperand(i);
if (Op.isReg()) {
- if ((Op.getReg() != SGPRUsed.Reg || Op.getSubReg() != SGPRUsed.SubReg) &&
+ RegSubRegPair SGPR(Op.getReg(), Op.getSubReg());
+ if (!SGPRsUsed.count(SGPR) &&
usesConstantBus(MRI, Op, InstDesc.OpInfo[i])) {
- return false;
+ if (--ConstantBusLimit <= 0)
+ return false;
+ SGPRsUsed.insert(SGPR);
}
} else if (InstDesc.OpInfo[i].OperandType == AMDGPU::OPERAND_KIMM32) {
- return false;
+ if (--ConstantBusLimit <= 0)
+ return false;
+ } else if (isVOP3(MI) && AMDGPU::isSISrcOperand(InstDesc, i) &&
+ isLiteralConstantLike(Op, InstDesc.OpInfo[i])) {
+ if (!VOP3LiteralLimit--)
+ return false;
+ if (--ConstantBusLimit <= 0)
+ return false;
}
}
}
@@ -3569,7 +3606,7 @@ void SIInstrInfo::legalizeOperandsVOP2(M
// disabled for the operand type for instructions because they will always
// violate the one constant bus use rule.
bool HasImplicitSGPR = findImplicitSGPRRead(MI) != AMDGPU::NoRegister;
- if (HasImplicitSGPR) {
+ if (HasImplicitSGPR && ST.getConstantBusLimit(Opc) <= 1) {
int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
MachineOperand &Src0 = MI.getOperand(Src0Idx);
@@ -3680,7 +3717,14 @@ void SIInstrInfo::legalizeOperandsVOP3(M
};
// Find the one SGPR operand we are allowed to use.
+ int ConstantBusLimit = ST.getConstantBusLimit(Opc);
+ int LiteralLimit = ST.hasVOP3Literal() ? 1 : 0;
+ SmallDenseSet<unsigned> SGPRsUsed;
unsigned SGPRReg = findUsedSGPR(MI, VOP3Idx);
+ if (SGPRReg != AMDGPU::NoRegister) {
+ SGPRsUsed.insert(SGPRReg);
+ --ConstantBusLimit;
+ }
for (unsigned i = 0; i < 3; ++i) {
int Idx = VOP3Idx[i];
@@ -3688,16 +3732,32 @@ void SIInstrInfo::legalizeOperandsVOP3(M
break;
MachineOperand &MO = MI.getOperand(Idx);
- // We should never see a VOP3 instruction with an illegal immediate operand.
- if (!MO.isReg())
+ if (!MO.isReg()) {
+ if (!isLiteralConstantLike(MO, get(Opc).OpInfo[Idx]))
+ continue;
+
+ if (LiteralLimit > 0 && ConstantBusLimit > 0) {
+ --LiteralLimit;
+ --ConstantBusLimit;
+ continue;
+ }
+
+ --LiteralLimit;
+ --ConstantBusLimit;
+ legalizeOpWithMove(MI, Idx);
continue;
+ }
if (!RI.isSGPRClass(MRI.getRegClass(MO.getReg())))
continue; // VGPRs are legal
- if (SGPRReg == AMDGPU::NoRegister || SGPRReg == MO.getReg()) {
- SGPRReg = MO.getReg();
- // We can use one SGPR in each VOP3 instruction.
+ // We can use one SGPR in each VOP3 instruction prior to GFX10
+ // and two starting from GFX10.
+ if (SGPRsUsed.count(MO.getReg()))
+ continue;
+ if (ConstantBusLimit > 0) {
+ SGPRsUsed.insert(MO.getReg());
+ --ConstantBusLimit;
continue;
}
More information about the llvm-commits
mailing list