[llvm] r359224 - [AMDGPU] gfx1010 utility functions
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 25 11:53:41 PDT 2019
Author: rampitec
Date: Thu Apr 25 11:53:41 2019
New Revision: 359224
URL: http://llvm.org/viewvc/llvm-project?rev=359224&view=rev
Log:
[AMDGPU] gfx1010 utility functions
Differential Revision: https://reviews.llvm.org/D61094
Modified:
llvm/trunk/lib/Target/AMDGPU/SIDefines.h
llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
Modified: llvm/trunk/lib/Target/AMDGPU/SIDefines.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIDefines.h?rev=359224&r1=359223&r2=359224&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIDefines.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIDefines.h Thu Apr 25 11:53:41 2019
@@ -121,6 +121,8 @@ namespace AMDGPU {
OPERAND_REG_IMM_FP32,
OPERAND_REG_IMM_FP64,
OPERAND_REG_IMM_FP16,
+ OPERAND_REG_IMM_V2FP16,
+ OPERAND_REG_IMM_V2INT16,
/// Operands with register or inline constant
OPERAND_REG_INLINE_C_INT16,
@@ -133,7 +135,7 @@ namespace AMDGPU {
OPERAND_REG_INLINE_C_V2INT16,
OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
- OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_FP16,
+ OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2INT16,
OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_C_V2INT16,
Modified: llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp?rev=359224&r1=359223&r2=359224&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/SIInsertWaitcnts.cpp Thu Apr 25 11:53:41 2019
@@ -826,7 +826,7 @@ bool SIInsertWaitcnts::generateWaitcntIn
// with knowledge of the called routines.
if (MI.getOpcode() == AMDGPU::SI_RETURN_TO_EPILOG ||
MI.getOpcode() == AMDGPU::S_SETPC_B64_return) {
- Wait = AMDGPU::Waitcnt::allZero();
+ Wait = AMDGPU::Waitcnt::allZero(IV);
}
// Resolve vm waits before gs-done.
else if ((MI.getOpcode() == AMDGPU::S_SENDMSG ||
@@ -998,7 +998,7 @@ bool SIInsertWaitcnts::generateWaitcntIn
// requiring a WAITCNT beforehand.
if (MI.getOpcode() == AMDGPU::S_BARRIER &&
!ST->hasAutoWaitcntBeforeBarrier()) {
- Wait = AMDGPU::Waitcnt::allZero();
+ Wait = AMDGPU::Waitcnt::allZero(IV);
}
// TODO: Remove this work-around, enable the assert for Bug 457939
@@ -1030,7 +1030,7 @@ bool SIInsertWaitcnts::generateWaitcntIn
}
if (ForceEmitZeroWaitcnts)
- Wait = AMDGPU::Waitcnt::allZero();
+ Wait = AMDGPU::Waitcnt::allZero(IV);
if (ForceEmitWaitcnt[VM_CNT])
Wait.VmCnt = 0;
@@ -1311,7 +1311,7 @@ bool SIInsertWaitcnts::insertWaitcntInBl
Inst.getOpcode() == AMDGPU::DS_GWS_SEMA_P ||
Inst.getOpcode() == AMDGPU::DS_GWS_BARRIER) {
// TODO: && context->target_info->GwsRequiresMemViolTest() ) {
- ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZero());
+ ScoreBrackets.applyWaitcnt(AMDGPU::Waitcnt::allZeroExceptVsCnt());
}
// TODO: Remove this work-around after fixing the scheduler and enable the
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp?rev=359224&r1=359223&r2=359224&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp Thu Apr 25 11:53:41 2019
@@ -84,7 +84,9 @@ unsigned getExpcntBitWidth() { return 3;
unsigned getLgkmcntBitShift() { return 8; }
/// \returns Lgkmcnt bit width.
-unsigned getLgkmcntBitWidth() { return 4; }
+unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
+ return (VersionMajor >= 10) ? 6 : 4;
+}
/// \returns Vmcnt bit shift (higher bits).
unsigned getVmcntBitShiftHi() { return 14; }
@@ -98,14 +100,6 @@ namespace llvm {
namespace AMDGPU {
-struct MIMGInfo {
- uint16_t Opcode;
- uint16_t BaseOpcode;
- uint8_t MIMGEncoding;
- uint8_t VDataDwords;
- uint8_t VAddrDwords;
-};
-
#define GET_MIMGBaseOpcodesTable_IMPL
#define GET_MIMGDimInfoTable_IMPL
#define GET_MIMGInfoTable_IMPL
@@ -119,6 +113,11 @@ int getMIMGOpcode(unsigned BaseOpcode, u
return Info ? Info->Opcode : -1;
}
+const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
+ const MIMGInfo *Info = getMIMGInfo(Opc);
+ return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
+}
+
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
const MIMGInfo *NewInfo =
@@ -279,6 +278,8 @@ unsigned getWavesPerWorkGroup(const MCSu
unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return getAddressableNumSGPRs(STI);
if (Version.Major >= 8)
return 16;
return 8;
@@ -300,6 +301,8 @@ unsigned getAddressableNumSGPRs(const MC
return FIXED_NUM_SGPRS_FOR_INIT_BUG;
IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return 106;
if (Version.Major >= 8)
return 102;
return 104;
@@ -308,6 +311,10 @@ unsigned getAddressableNumSGPRs(const MC
unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
assert(WavesPerEU != 0);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return 0;
+
if (WavesPerEU >= getMaxWavesPerEU())
return 0;
@@ -322,8 +329,10 @@ unsigned getMaxNumSGPRs(const MCSubtarge
bool Addressable) {
assert(WavesPerEU != 0);
- IsaVersion Version = getIsaVersion(STI->getCPU());
unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
+ IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return Addressable ? AddressableNumSGPRs : 108;
if (Version.Major >= 8 && !Addressable)
AddressableNumSGPRs = 112;
unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
@@ -340,6 +349,9 @@ unsigned getNumExtraSGPRs(const MCSubtar
ExtraSGPRs = 2;
IsaVersion Version = getIsaVersion(STI->getCPU());
+ if (Version.Major >= 10)
+ return ExtraSGPRs;
+
if (Version.Major < 8) {
if (FlatScrUsed)
ExtraSGPRs = 4;
@@ -540,13 +552,14 @@ unsigned getExpcntBitMask(const IsaVersi
}
unsigned getLgkmcntBitMask(const IsaVersion &Version) {
- return (1 << getLgkmcntBitWidth()) - 1;
+ return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
}
unsigned getWaitcntBitMask(const IsaVersion &Version) {
unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
- unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
+ unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(),
+ getLgkmcntBitWidth(Version.Major));
unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
if (Version.Major < 9)
return Waitcnt;
@@ -572,7 +585,8 @@ unsigned decodeExpcnt(const IsaVersion &
}
unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
- return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
+ return unpackBits(Waitcnt, getLgkmcntBitShift(),
+ getLgkmcntBitWidth(Version.Major));
}
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
@@ -608,7 +622,8 @@ unsigned encodeExpcnt(const IsaVersion &
unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned Lgkmcnt) {
- return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
+ return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(),
+ getLgkmcntBitWidth(Version.Major));
}
unsigned encodeWaitcnt(const IsaVersion &Version,
@@ -800,10 +815,13 @@ bool isSISrcFPOperand(const MCInstrDesc
case AMDGPU::OPERAND_REG_IMM_FP32:
case AMDGPU::OPERAND_REG_IMM_FP64:
case AMDGPU::OPERAND_REG_IMM_FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_FP32:
case AMDGPU::OPERAND_REG_INLINE_C_FP64:
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
return true;
default:
return false;
@@ -934,6 +952,13 @@ bool isInlinableLiteral16(int16_t Litera
bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
assert(HasInv2Pi);
+ if (isInt<16>(Literal) || isUInt<16>(Literal)) {
+ int16_t Trunc = static_cast<int16_t>(Literal);
+ return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
+ }
+ if (!(Literal & 0xffff))
+ return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
+
int16_t Lo16 = static_cast<int16_t>(Literal);
int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
@@ -965,15 +990,19 @@ bool isArgPassedInSGPR(const Argument *A
}
}
+static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
+ return isGCN3Encoding(ST) || isGFX10(ST);
+}
+
int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
- if (isGCN3Encoding(ST))
+ if (hasSMEMByteOffset(ST))
return ByteOffset;
return ByteOffset >> 2;
}
bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
- return isGCN3Encoding(ST) ?
+ return (hasSMEMByteOffset(ST)) ?
isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
}
Modified: llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h?rev=359224&r1=359223&r2=359224&view=diff
==============================================================================
--- llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (original)
+++ llvm/trunk/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h Thu Apr 25 11:53:41 2019
@@ -203,7 +203,13 @@ struct MIMGDimInfo {
};
LLVM_READONLY
-const MIMGDimInfo *getMIMGDimInfo(unsigned Dim);
+const MIMGDimInfo *getMIMGDimInfo(unsigned DimEnum);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByEncoding(uint8_t DimEnc);
+
+LLVM_READONLY
+const MIMGDimInfo *getMIMGDimInfoByAsmSuffix(StringRef AsmSuffix);
struct MIMGLZMappingInfo {
MIMGBaseOpcode L;
@@ -220,6 +226,17 @@ int getMIMGOpcode(unsigned BaseOpcode, u
LLVM_READONLY
int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels);
+struct MIMGInfo {
+ uint16_t Opcode;
+ uint16_t BaseOpcode;
+ uint8_t MIMGEncoding;
+ uint8_t VDataDwords;
+ uint8_t VAddrDwords;
+};
+
+LLVM_READONLY
+const MIMGInfo *getMIMGInfo(unsigned Opc);
+
LLVM_READONLY
int getMUBUFBaseOpcode(unsigned Opc);
@@ -285,21 +302,30 @@ struct Waitcnt {
unsigned VmCnt = ~0u;
unsigned ExpCnt = ~0u;
unsigned LgkmCnt = ~0u;
+ unsigned VsCnt = ~0u;
Waitcnt() {}
- Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt)
- : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt) {}
+ Waitcnt(unsigned VmCnt, unsigned ExpCnt, unsigned LgkmCnt, unsigned VsCnt)
+ : VmCnt(VmCnt), ExpCnt(ExpCnt), LgkmCnt(LgkmCnt), VsCnt(VsCnt) {}
- static Waitcnt allZero() { return Waitcnt(0, 0, 0); }
+ static Waitcnt allZero(const IsaVersion &Version) {
+ return Waitcnt(0, 0, 0, Version.Major >= 10 ? 0 : ~0u);
+ }
+ static Waitcnt allZeroExceptVsCnt() { return Waitcnt(0, 0, 0, ~0u); }
+
+ bool hasWait() const {
+ return VmCnt != ~0u || ExpCnt != ~0u || LgkmCnt != ~0u || VsCnt != ~0u;
+ }
bool dominates(const Waitcnt &Other) const {
return VmCnt <= Other.VmCnt && ExpCnt <= Other.ExpCnt &&
- LgkmCnt <= Other.LgkmCnt;
+ LgkmCnt <= Other.LgkmCnt && VsCnt <= Other.VsCnt;
}
Waitcnt combined(const Waitcnt &Other) const {
return Waitcnt(std::min(VmCnt, Other.VmCnt), std::min(ExpCnt, Other.ExpCnt),
- std::min(LgkmCnt, Other.LgkmCnt));
+ std::min(LgkmCnt, Other.LgkmCnt),
+ std::min(VsCnt, Other.VsCnt));
}
};
@@ -332,7 +358,8 @@ unsigned decodeLgkmcnt(const IsaVersion
/// \p Vmcnt = \p Waitcnt[3:0] (pre-gfx9 only)
/// \p Vmcnt = \p Waitcnt[3:0] | \p Waitcnt[15:14] (gfx9+ only)
/// \p Expcnt = \p Waitcnt[6:4]
-/// \p Lgkmcnt = \p Waitcnt[11:8]
+/// \p Lgkmcnt = \p Waitcnt[11:8] (pre-gfx10 only)
+/// \p Lgkmcnt = \p Waitcnt[13:8] (gfx10+ only)
void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt);
@@ -357,7 +384,8 @@ unsigned encodeLgkmcnt(const IsaVersion
/// Waitcnt[3:0] = \p Vmcnt (pre-gfx9 only)
/// Waitcnt[3:0] = \p Vmcnt[3:0] (gfx9+ only)
/// Waitcnt[6:4] = \p Expcnt
-/// Waitcnt[11:8] = \p Lgkmcnt
+/// Waitcnt[11:8] = \p Lgkmcnt (pre-gfx10 only)
+/// Waitcnt[13:8] = \p Lgkmcnt (gfx10+ only)
/// Waitcnt[15:14] = \p Vmcnt[5:4] (gfx9+ only)
///
/// \returns Waitcnt with encoded \p Vmcnt, \p Expcnt and \p Lgkmcnt for given
@@ -455,6 +483,8 @@ inline unsigned getOperandSize(const MCO
case AMDGPU::OPERAND_REG_INLINE_C_FP16:
case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
+ case AMDGPU::OPERAND_REG_IMM_V2INT16:
+ case AMDGPU::OPERAND_REG_IMM_V2FP16:
return 2;
default:
More information about the llvm-commits
mailing list