[llvm] [AMDGPU][MC] Add GFX12 VIMAGE and VSAMPLE encodings (PR #74062)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 1 03:20:49 PST 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mirko BrkuĊĦanin (mbrkusanin)
<details>
<summary>Changes</summary>
---
Patch is 380.41 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/74062.diff
20 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp (+217-10)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+22-5)
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h (+1)
- (modified) llvm/lib/Target/AMDGPU/GCNSubtarget.h (+3-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp (+94)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.h (+2)
- (modified) llvm/lib/Target/AMDGPU/MIMGInstructions.td (+503-256)
- (modified) llvm/lib/Target/AMDGPU/SIDefines.h (+50-8)
- (modified) llvm/lib/Target/AMDGPU/SIInstrFormats.td (+62-7)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.h (+26-2)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+3-1)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+1-1)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vimage.s (+944)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vimage_alias.s (+25)
- (added) llvm/test/MC/AMDGPU/gfx12_asm_vsample.s (+1216)
- (added) llvm/test/MC/AMDGPU/gfx12_err.s (+43)
- (modified) llvm/test/MC/Disassembler/AMDGPU/decode-err.txt (+8-3)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vimage.txt (+944)
- (added) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vsample.txt (+1216)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 4326f3c3fbe1ae7..3c9f9cfd834facf 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -1001,6 +1001,9 @@ GCNSubtarget::createFillMFMAShadowMutation(const TargetInstrInfo *TII) const {
}
unsigned GCNSubtarget::getNSAThreshold(const MachineFunction &MF) const {
+ if (getGeneration() >= AMDGPUSubtarget::GFX12)
+ return 0; // Not MIMG encoding.
+
if (NSAThreshold.getNumOccurrences() > 0)
return std::max(NSAThreshold.getValue(), 2u);
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index be74c627d213756..a4e582e1ffaa526 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1460,6 +1460,14 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return AMDGPU::isGFX11Plus(getSTI());
}
+ bool isGFX12() const {
+ return AMDGPU::isGFX12(getSTI());
+ }
+
+ bool isGFX12Plus() const {
+ return AMDGPU::isGFX12Plus(getSTI());
+ }
+
bool isGFX10_AEncoding() const { return AMDGPU::isGFX10_AEncoding(getSTI()); }
bool isGFX10_BEncoding() const {
@@ -1492,8 +1500,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return getFeatureBits()[AMDGPU::FeaturePartialNSAEncoding];
}
- unsigned getNSAMaxSize() const {
- return AMDGPU::getNSAMaxSize(getSTI());
+ unsigned getNSAMaxSize(bool HasSampler = false) const {
+ return AMDGPU::getNSAMaxSize(getSTI(), HasSampler);
}
unsigned getMaxNumUserSGPRs() const {
@@ -1572,6 +1580,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
unsigned getCPolKind(StringRef Id, StringRef Mnemo, bool &Disabling) const;
ParseStatus parseCPol(OperandVector &Operands);
+ ParseStatus parseScope(OperandVector &Operands, int64_t &Scope);
+ ParseStatus parseTH(OperandVector &Operands, int64_t &TH);
ParseStatus parseStringWithPrefix(StringRef Prefix, StringRef &Value,
SMLoc &StringLoc);
@@ -1689,6 +1699,8 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
bool validateWaitCnt(const MCInst &Inst, const OperandVector &Operands);
bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
const SMLoc &IDLoc);
+ bool validateTHAndScopeBits(const MCInst &Inst, const OperandVector &Operands,
+ const unsigned CPol);
bool validateExeczVcczOperands(const OperandVector &Operands);
bool validateTFE(const MCInst &Inst, const OperandVector &Operands);
std::optional<StringRef> validateLdsDirect(const MCInst &Inst);
@@ -3600,13 +3612,16 @@ bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
return true;
}
+constexpr uint64_t MIMGFlags = SIInstrFlags::MIMG | SIInstrFlags::VIMAGE |
+ SIInstrFlags::VSAMPLE;
+
bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst,
const SMLoc &IDLoc) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
@@ -3652,7 +3667,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
+ if ((Desc.TSFlags & MIMGFlags) == 0 || !isGFX10Plus())
return true;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
@@ -3660,7 +3675,9 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
- int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
+ int RSrcOpName = Desc.TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
+ : AMDGPU::OpName::rsrc;
+ int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, RSrcOpName);
int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
@@ -3668,7 +3685,7 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
assert(SrsrcIdx != -1);
assert(SrsrcIdx > VAddr0Idx);
- bool IsA16 = Inst.getOperand(A16Idx).getImm();
+ bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
if (BaseOpcode->BVH) {
if (IsA16 == BaseOpcode->A16)
return true;
@@ -3687,7 +3704,9 @@ bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst,
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
if (IsNSA) {
- if (hasPartialNSAEncoding() && ExpectedAddrSize > getNSAMaxSize()) {
+ if (hasPartialNSAEncoding() &&
+ ExpectedAddrSize >
+ getNSAMaxSize(Desc.TSFlags & SIInstrFlags::VSAMPLE)) {
int VAddrLastIdx = SrsrcIdx - 1;
unsigned VAddrLastSize =
AMDGPU::getRegOperandSize(getMRI(), Desc, VAddrLastIdx) / 4;
@@ -3717,7 +3736,7 @@ bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
if (!Desc.mayLoad() || !Desc.mayStore())
return true; // Not atomic
@@ -3755,7 +3774,7 @@ bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
@@ -3932,7 +3951,7 @@ bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
const unsigned Opc = Inst.getOpcode();
const MCInstrDesc &Desc = MII.get(Opc);
- if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
+ if ((Desc.TSFlags & MIMGFlags) == 0)
return true;
int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
@@ -4570,6 +4589,9 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
unsigned CPol = Inst.getOperand(CPolPos).getImm();
+ if (isGFX12Plus())
+ return validateTHAndScopeBits(Inst, Operands, CPol);
+
uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
if (TSFlags & SIInstrFlags::SMRD) {
if (CPol && (isSI() || isCI())) {
@@ -4621,6 +4643,61 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
return true;
}
+bool AMDGPUAsmParser::validateTHAndScopeBits(const MCInst &Inst,
+ const OperandVector &Operands,
+ const unsigned CPol) {
+ const unsigned TH = CPol & AMDGPU::CPol::TH;
+ const unsigned Scope = CPol & AMDGPU::CPol::SCOPE;
+
+ const unsigned Opcode = Inst.getOpcode();
+ const MCInstrDesc &TID = MII.get(Opcode);
+
+ auto PrintError = [&](StringRef Msg) {
+ SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
+ Error(S, Msg);
+ return false;
+ };
+
+ if ((TID.TSFlags & SIInstrFlags::IsAtomicRet) &&
+ (TID.TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF)) &&
+ (!(TH & AMDGPU::CPol::TH_ATOMIC_RETURN)))
+ return PrintError("instruction must use th:TH_ATOMIC_RETURN");
+
+ if (TH == 0)
+ return true;
+
+ if ((TID.TSFlags & SIInstrFlags::SMRD) &&
+ ((TH == AMDGPU::CPol::TH_NT_RT) ||
+ (TH == AMDGPU::CPol::TH_RT_NT) ||
+ (TH == AMDGPU::CPol::TH_NT_HT)))
+ return PrintError("invalid th value for SMEM instruction");
+
+ if (TH == AMDGPU::CPol::TH_BYPASS) {
+ if ((Scope != AMDGPU::CPol::SCOPE_SYS &&
+ CPol & AMDGPU::CPol::TH_REAL_BYPASS) ||
+ (Scope == AMDGPU::CPol::SCOPE_SYS &&
+ !(CPol & AMDGPU::CPol::TH_REAL_BYPASS)))
+ return PrintError("scope and th combination is not valid");
+ }
+
+ bool IsStore = TID.mayStore();
+ bool IsAtomic =
+ TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
+
+ if (IsAtomic) {
+ if (!(CPol & AMDGPU::CPol::TH_TYPE_ATOMIC))
+ return PrintError("invalid th value for atomic instructions");
+ } else if (IsStore) {
+ if (!(CPol & AMDGPU::CPol::TH_TYPE_STORE))
+ return PrintError("invalid th value for store instructions");
+ } else {
+ if (!(CPol & AMDGPU::CPol::TH_TYPE_LOAD))
+ return PrintError("invalid th value for load instructions");
+ }
+
+ return true;
+}
+
bool AMDGPUAsmParser::validateExeczVcczOperands(const OperandVector &Operands) {
if (!isGFX11Plus())
return true;
@@ -6094,6 +6171,47 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
}
ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
+ if (isGFX12Plus()) {
+ SMLoc StringLoc = getLoc();
+
+ int64_t CPolVal = 0;
+ ParseStatus ResTH = ParseStatus::NoMatch;
+ ParseStatus ResScope = ParseStatus::NoMatch;
+
+ for (;;) {
+ if (ResTH.isNoMatch()) {
+ int64_t TH;
+ ResTH = parseTH(Operands, TH);
+ if (ResTH.isFailure())
+ return ResTH;
+ if (ResTH.isSuccess()) {
+ CPolVal |= TH;
+ continue;
+ }
+ }
+
+ if (ResScope.isNoMatch()) {
+ int64_t Scope;
+ ResScope = parseScope(Operands, Scope);
+ if (ResScope.isFailure())
+ return ResScope;
+ if (ResScope.isSuccess()) {
+ CPolVal |= Scope;
+ continue;
+ }
+ }
+
+ break;
+ }
+
+ if (ResTH.isNoMatch() && ResScope.isNoMatch())
+ return ParseStatus::NoMatch;
+
+ Operands.push_back(AMDGPUOperand::CreateImm(this, CPolVal, StringLoc,
+ AMDGPUOperand::ImmTyCPol));
+ return ParseStatus::Success;
+ }
+
StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
SMLoc OpLoc = getLoc();
unsigned Enabled = 0, Seen = 0;
@@ -6129,6 +6247,95 @@ ParseStatus AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
return ParseStatus::Success;
}
+ParseStatus AMDGPUAsmParser::parseScope(OperandVector &Operands,
+ int64_t &Scope) {
+ Scope = AMDGPU::CPol::SCOPE_CU; // default;
+
+ StringRef Value;
+ SMLoc StringLoc;
+ ParseStatus Res;
+
+ Res = parseStringWithPrefix("scope", Value, StringLoc);
+ if (!Res.isSuccess())
+ return Res;
+
+ Scope = StringSwitch<int64_t>(Value)
+ .Case("SCOPE_CU", AMDGPU::CPol::SCOPE_CU)
+ .Case("SCOPE_SE", AMDGPU::CPol::SCOPE_SE)
+ .Case("SCOPE_DEV", AMDGPU::CPol::SCOPE_DEV)
+ .Case("SCOPE_SYS", AMDGPU::CPol::SCOPE_SYS)
+ .Default(0xffffffff);
+
+ if (Scope == 0xffffffff)
+ return Error(StringLoc, "invalid scope value");
+
+ return ParseStatus::Success;
+}
+
+ParseStatus AMDGPUAsmParser::parseTH(OperandVector &Operands, int64_t &TH) {
+ TH = AMDGPU::CPol::TH_RT; // default
+
+ StringRef Value;
+ SMLoc StringLoc;
+ ParseStatus Res = parseStringWithPrefix("th", Value, StringLoc);
+ if (!Res.isSuccess())
+ return Res;
+
+ if (Value == "TH_DEFAULT")
+ TH = AMDGPU::CPol::TH_RT;
+ else if (Value == "TH_STORE_LU" || Value == "TH_LOAD_RT_WB" ||
+ Value == "TH_LOAD_NT_WB") {
+ return Error(StringLoc, "invalid th value");
+ } else if (Value.startswith("TH_ATOMIC_")) {
+ Value = Value.drop_front(10);
+ TH = AMDGPU::CPol::TH_TYPE_ATOMIC;
+ } else if (Value.startswith("TH_LOAD_")) {
+ Value = Value.drop_front(8);
+ TH = AMDGPU::CPol::TH_TYPE_LOAD;
+ } else if (Value.startswith("TH_STORE_")) {
+ Value = Value.drop_front(9);
+ TH = AMDGPU::CPol::TH_TYPE_STORE;
+ } else {
+ return Error(StringLoc, "invalid th value");
+ }
+
+ if (Value == "BYPASS")
+ TH |= AMDGPU::CPol::TH_REAL_BYPASS;
+
+ if (TH != 0) {
+ if (TH & AMDGPU::CPol::TH_TYPE_ATOMIC)
+ TH |= StringSwitch<int64_t>(Value)
+ .Case("RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
+ .Case("RT", AMDGPU::CPol::TH_RT)
+ .Case("RT_RETURN", AMDGPU::CPol::TH_ATOMIC_RETURN)
+ .Case("NT", AMDGPU::CPol::TH_ATOMIC_NT)
+ .Case("NT_RETURN", AMDGPU::CPol::TH_ATOMIC_NT |
+ AMDGPU::CPol::TH_ATOMIC_RETURN)
+ .Case("CASCADE_RT", AMDGPU::CPol::TH_ATOMIC_CASCADE)
+ .Case("CASCADE_NT", AMDGPU::CPol::TH_ATOMIC_CASCADE |
+ AMDGPU::CPol::TH_ATOMIC_NT)
+ .Default(0xffffffff);
+ else
+ TH |= StringSwitch<int64_t>(Value)
+ .Case("RT", AMDGPU::CPol::TH_RT)
+ .Case("NT", AMDGPU::CPol::TH_NT)
+ .Case("HT", AMDGPU::CPol::TH_HT)
+ .Case("LU", AMDGPU::CPol::TH_LU)
+ .Case("RT_WB", AMDGPU::CPol::TH_RT_WB)
+ .Case("NT_RT", AMDGPU::CPol::TH_NT_RT)
+ .Case("RT_NT", AMDGPU::CPol::TH_RT_NT)
+ .Case("NT_HT", AMDGPU::CPol::TH_NT_HT)
+ .Case("NT_WB", AMDGPU::CPol::TH_NT_WB)
+ .Case("BYPASS", AMDGPU::CPol::TH_BYPASS)
+ .Default(0xffffffff);
+ }
+
+ if (TH == 0xffffffff)
+ return Error(StringLoc, "invalid th value");
+
+ return ParseStatus::Success;
+}
+
static void addOptionalImmOperand(
MCInst& Inst, const OperandVector& Operands,
AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 1b301ee5f49b216..6f7dd8cd947f5ef 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -507,6 +507,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = tryDecodeInst(DecoderTableGFX1196, MI, DecW, Address, CS);
if (Res)
break;
+
+ Res = tryDecodeInst(DecoderTableGFX1296, MI, DecW, Address, CS);
+ if (Res)
+ break;
}
// Reinitialize Bytes
Bytes = Bytes_.slice(0, MaxInstBytesNum);
@@ -714,6 +718,10 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
Res = convertMIMGInst(MI);
}
+ if (Res && (MCII->get(MI.getOpcode()).TSFlags &
+ (SIInstrFlags::VIMAGE | SIInstrFlags::VSAMPLE)))
+ Res = convertMIMGInst(MI);
+
if (Res && (MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::EXP))
Res = convertEXPInst(MI);
@@ -914,6 +922,7 @@ DecodeStatus AMDGPUDisassembler::convertVOP3DPPInst(MCInst &MI) const {
// VADDR size. Consequently, decoded instructions always show address as if it
// has 1 dword, which could be not really so.
DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
+ auto TSFlags = MCII->get(MI.getOpcode()).TSFlags;
int VDstIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vdst);
@@ -922,8 +931,9 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AMDGPU::OpName::vdata);
int VAddr0Idx =
AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
- int RsrcIdx =
- AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::srsrc);
+ int RsrcOpName = TSFlags & SIInstrFlags::MIMG ? AMDGPU::OpName::srsrc
+ : AMDGPU::OpName::rsrc;
+ int RsrcIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), RsrcOpName);
int DMaskIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::dmask);
@@ -944,7 +954,8 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
}
bool IsAtomic = (VDstIdx != -1);
- bool IsGather4 = MCII->get(MI.getOpcode()).TSFlags & SIInstrFlags::Gather4;
+ bool IsGather4 = TSFlags & SIInstrFlags::Gather4;
+ bool IsVSample = TSFlags & SIInstrFlags::VSAMPLE;
bool IsNSA = false;
bool IsPartialNSA = false;
unsigned AddrSize = Info->VAddrDwords;
@@ -961,10 +972,13 @@ DecodeStatus AMDGPUDisassembler::convertMIMGInst(MCInst &MI) const {
AddrSize =
AMDGPU::getAddrSizeMIMGOp(BaseOpcode, Dim, IsA16, AMDGPU::hasG16(STI));
+ // VSAMPLE insts that do not use vaddr3 behave the same as NSA forms.
+ // VIMAGE insts other than BVH never use vaddr4.
IsNSA = Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA ||
- Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA;
+ Info->MIMGEncoding == AMDGPU::MIMGEncGfx11NSA ||
+ Info->MIMGEncoding == AMDGPU::MIMGEncGfx12;
if (!IsNSA) {
- if (AddrSize > 12)
+ if (!IsVSample && AddrSize > 12)
AddrSize = 16;
} else {
if (AddrSize > Info->VAddrDwords) {
@@ -1713,6 +1727,9 @@ bool AMDGPUDisassembler::isGFX11Plus() const {
return AMDGPU::isGFX11Plus(STI);
}
+bool AMDGPUDisassembler::isGFX12Plus() const {
+ return AMDGPU::isGFX12Plus(STI);
+}
bool AMDGPUDisassembler::hasArchitectedFlatScratch() const {
return STI.hasFeature(AMDGPU::FeatureArchitectedFlatScratch);
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
index 91b73b593d61617..7e233dcb54ea160 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.h
@@ -264,6 +264,7 @@ class AMDGPUDisassembler : public MCDisassembler {
bool isGFX10Plus() const;
bool isGFX11() const;
bool isGFX11Plus() const;
+ bool isGFX12Plus() const;
bool hasArchitectedFlatScratch() const;
bool hasKernargPreload() const;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 0074d1632161d18..94b9e49b765a6fd 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -967,7 +967,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasPartialNSAEncoding() const { return HasPartialNSAEncoding; }
- unsigned getNSAMaxSize() const { return AMDGPU::getNSAMaxSize(*this); }
+ unsigned getNSAMaxSize(bool HasSampler = false) const {
+ return AMDGPU::getNSAMaxSize(*this, HasSampler);
+ }
bool hasGFX10_AEncoding() const {
return GFX10_AEncoding;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 9459ee088dddee5..7ba015cdea2413d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -168,6 +168,17 @@ void AMDGPUInstPrinter::printSMRDLiteralOffset(const MCInst *MI, unsigned OpNo,
void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
const MCSubtargetInfo &STI, raw_ostream &O) {
auto Imm = MI->getOperand(OpNo).getImm();
+
+ if (AMDGPU::isGFX12Plus(STI)) {
+ const int64_t TH = Imm & CPol::TH;
+ const int64_t Scope = Imm & CPol::SCOPE;
+
+ printTH(MI, TH, Scope, O);
+ printScope(Scope, O);
+
+ return;
+ }
+
if (Imm & CPol::GLC)
O << ((AMDGPU::isGFX940(STI) &&
!(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
@@ -182,6 +193,89 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
O << " /* unexpected cache policy bit */";
}
+void AMDGPUInstPrinter::printTH(const MCInst *MI, int64_t TH, int64_t Scope,
+ raw_ostream &O) {
+ // For th = 0 do not print this field
+ if (TH == 0)
+ return;
+
+ const unsigned Opcode = MI->getOpcode();
+ const MCInstrDesc &TID = MII.get(Opcode);
+ bool IsStore = TID.mayStore();
+ bool IsAtomic =
+ TID.TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet);
+
+ O << " th:";
+
+ if (IsAtomic) {
+ O << "TH_ATOMIC_";
+ if (TH & AMDGPU::CPol::TH_ATOMIC_CASCADE) {
+ if (Scope >= AMDGPU::CPol::SCOPE_DEV)
+ O << "CASCADE" << (TH...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/74062
More information about the llvm-commits
mailing list