[llvm] MCExpr-ify AMDGPU PALMetadata (PR #93236)
Janek van Oirschot via llvm-commits
llvm-commits at lists.llvm.org
Fri May 31 06:29:52 PDT 2024
https://github.com/JanekvO updated https://github.com/llvm/llvm-project/pull/93236
>From f3656539152691294233ad20ca27d97b49d571cb Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Thu, 23 May 2024 12:34:37 -0700
Subject: [PATCH 1/2] MCExpr-ify AMDGPU PALMetadata
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 71 +++--
.../AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp | 64 +++++
.../Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h | 39 +++
.../Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp | 113 ++++++++
.../Target/AMDGPU/Utils/AMDGPUPALMetadata.h | 24 ++
llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt | 1 +
llvm/test/CodeGen/AMDGPU/amdpal-es.ll | 1 +
llvm/test/CodeGen/AMDGPU/amdpal-gs.ll | 1 +
llvm/test/CodeGen/AMDGPU/amdpal-hs.ll | 1 +
llvm/test/CodeGen/AMDGPU/amdpal-ls.ll | 1 +
llvm/test/CodeGen/AMDGPU/amdpal-vs.ll | 1 +
llvm/unittests/Target/AMDGPU/CMakeLists.txt | 1 +
llvm/unittests/Target/AMDGPU/PALMetadata.cpp | 245 ++++++++++++++++++
13 files changed, 543 insertions(+), 20 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp
create mode 100644 llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h
create mode 100644 llvm/unittests/Target/AMDGPU/PALMetadata.cpp
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index cad4a3430327b..f4028adc84828 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -1194,6 +1194,30 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
getLdsDwGranularity(ST) * sizeof(uint32_t)));
}
+static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) {
+ unsigned Shift = 0;
+ unsigned Mask = 0;
+
+ Mask = ~Value;
+ for (; !(Mask & 1); Shift++, Mask >>= 1) {
+ }
+
+ return std::make_pair(Shift, Mask);
+}
+
+static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask,
+ uint32_t Shift, MCContext &Ctx) {
+ if (Mask) {
+ const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+ Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+ }
+ if (Shift) {
+ const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+ Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
+ }
+ return Val;
+}
+
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
// is AMDPAL. It stores each compute/SPI register setting and other PAL
// metadata items into the PALMD::Metadata, combining with any provided by the
@@ -1207,41 +1231,49 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
auto &Ctx = MF.getContext();
MD->setEntryPoint(CC, MF.getFunction().getName());
- MD->setNumUsedVgprs(
- CC, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx));
+ MD->setNumUsedVgprs(CC, CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx);
// Only set AGPRs for supported devices
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
if (STM.hasMAIInsts()) {
- MD->setNumUsedAgprs(CC, getMCExprValue(CurrentProgramInfo.NumAccVGPR, Ctx));
+ MD->setNumUsedAgprs(CC, CurrentProgramInfo.NumAccVGPR);
}
- MD->setNumUsedSgprs(
- CC, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx));
+ MD->setNumUsedSgprs(CC, CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx);
if (MD->getPALMajorVersion() < 3) {
- MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM));
+ MD->setRsrc1(CC, CurrentProgramInfo.getPGMRSrc1(CC, STM, Ctx), Ctx);
if (AMDGPU::isCompute(CC)) {
- MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2());
+ MD->setRsrc2(CC, CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
} else {
- if (getMCExprValue(CurrentProgramInfo.ScratchBlocks, Ctx) > 0)
- MD->setRsrc2(CC, S_00B84C_SCRATCH_EN(1));
+ const MCExpr *HasScratchBlocks =
+ MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks,
+ MCConstantExpr::create(0, Ctx), Ctx);
+ auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN);
+ MD->setRsrc2(CC, MaskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
}
} else {
MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
- MD->setHwStage(CC, ".scratch_en",
- (bool)getMCExprValue(CurrentProgramInfo.ScratchEnable, Ctx));
+ MD->setHwStage(CC, ".scratch_en", msgpack::Type::Boolean,
+ CurrentProgramInfo.ScratchEnable);
EmitPALMetadataCommon(MD, CurrentProgramInfo, CC, STM);
}
// ScratchSize is in bytes, 16 aligned.
MD->setScratchSize(
- CC, alignTo(getMCExprValue(CurrentProgramInfo.ScratchSize, Ctx), 16));
+ CC,
+ AMDGPUVariadicMCExpr::createAlignTo(CurrentProgramInfo.ScratchSize,
+ MCConstantExpr::create(16, Ctx), Ctx),
+ Ctx);
+
if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_PS) {
unsigned ExtraLDSSize = STM.getGeneration() >= AMDGPUSubtarget::GFX11
? divideCeil(CurrentProgramInfo.LDSBlocks, 2)
: CurrentProgramInfo.LDSBlocks;
if (MD->getPALMajorVersion() < 3) {
- MD->setRsrc2(CC, S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize));
+ MD->setRsrc2(
+ CC,
+ MCConstantExpr::create(S_00B02C_EXTRA_LDS_SIZE(ExtraLDSSize), Ctx),
+ Ctx);
MD->setSpiPsInputEna(MFI->getPSInputEnable());
MD->setSpiPsInputAddr(MFI->getPSInputAddr());
} else {
@@ -1288,20 +1320,19 @@ void AMDGPUAsmPrinter::emitPALFunctionMetadata(const MachineFunction &MF) {
if (MD->getPALMajorVersion() < 3) {
// Set compute registers
- MD->setRsrc1(CallingConv::AMDGPU_CS,
- CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST));
+ MD->setRsrc1(
+ CallingConv::AMDGPU_CS,
+ CurrentProgramInfo.getPGMRSrc1(CallingConv::AMDGPU_CS, ST, Ctx), Ctx);
MD->setRsrc2(CallingConv::AMDGPU_CS,
- CurrentProgramInfo.getComputePGMRSrc2());
+ CurrentProgramInfo.getComputePGMRSrc2(Ctx), Ctx);
} else {
EmitPALMetadataCommon(MD, CurrentProgramInfo, CallingConv::AMDGPU_CS, ST);
}
// Set optional info
MD->setFunctionLdsSize(FnName, CurrentProgramInfo.LDSSize);
- MD->setFunctionNumUsedVgprs(
- FnName, getMCExprValue(CurrentProgramInfo.NumVGPRsForWavesPerEU, Ctx));
- MD->setFunctionNumUsedSgprs(
- FnName, getMCExprValue(CurrentProgramInfo.NumSGPRsForWavesPerEU, Ctx));
+ MD->setFunctionNumUsedVgprs(FnName, CurrentProgramInfo.NumVGPRsForWavesPerEU);
+ MD->setFunctionNumUsedSgprs(FnName, CurrentProgramInfo.NumSGPRsForWavesPerEU);
}
// This is supposed to be log2(Size)
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp
new file mode 100644
index 0000000000000..3955f557b9a25
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.cpp
@@ -0,0 +1,64 @@
+//===- AMDGPUDelayedMCExpr.cpp - Delayed MCExpr resolve ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUDelayedMCExpr.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCValue.h"
+
+using namespace llvm;
+
+static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type,
+ MCValue Val) {
+ msgpack::Document *Doc = DN.getDocument();
+ switch (Type) {
+ default:
+ return Doc->getEmptyNode();
+ case msgpack::Type::Int:
+ return Doc->getNode(static_cast<int64_t>(Val.getConstant()));
+ case msgpack::Type::UInt:
+ return Doc->getNode(static_cast<uint64_t>(Val.getConstant()));
+ case msgpack::Type::Boolean:
+ return Doc->getNode(static_cast<bool>(Val.getConstant()));
+ }
+}
+
+void DelayedMCExpr::AssignDocNode(msgpack::DocNode &DN, msgpack::Type Type,
+ const MCExpr *Expr) {
+ MCValue Res;
+ if (Expr->evaluateAsRelocatable(Res, nullptr, nullptr)) {
+ if (Res.isAbsolute()) {
+ DN = getNode(DN, Type, Res);
+ return;
+ }
+ }
+
+ DelayedExprs.push_back(DelayedExpr{DN, Type, Expr});
+}
+
+bool DelayedMCExpr::ResolveDelayedExpressions() {
+ bool Success;
+
+ while (!DelayedExprs.empty()) {
+ DelayedExpr DE = DelayedExprs.front();
+ MCValue Res;
+
+ Success = DE.Expr->evaluateAsRelocatable(Res, nullptr, nullptr);
+ Success &= Res.isAbsolute();
+ if (!Success)
+ return false;
+
+ DelayedExprs.pop_front();
+ DE.DN = getNode(DE.DN, DE.Type, Res);
+ }
+
+ return true;
+}
+
+void DelayedMCExpr::clear() { DelayedExprs.clear(); }
+
+bool DelayedMCExpr::empty() { return DelayedExprs.empty(); }
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h
new file mode 100644
index 0000000000000..c546660a0d996
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUDelayedMCExpr.h
@@ -0,0 +1,39 @@
+//===- AMDGPUDelayedMCExpr.h - Delayed MCExpr resolve -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H
+
+#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include <deque>
+
+namespace llvm {
+class MCExpr;
+
+class DelayedMCExpr {
+ struct DelayedExpr {
+ msgpack::DocNode &DN;
+ msgpack::Type Type;
+ const MCExpr *Expr;
+ DelayedExpr(msgpack::DocNode &DN, msgpack::Type Type, const MCExpr *Expr)
+ : DN(DN), Type(Type), Expr(Expr) {}
+ };
+
+ std::deque<DelayedExpr> DelayedExprs;
+
+public:
+ bool ResolveDelayedExpressions();
+ void AssignDocNode(msgpack::DocNode &DN, msgpack::Type Type,
+ const MCExpr *Expr);
+ void clear();
+ bool empty();
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPUDELAYEDMCEXPR_H
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
index 0fa67c559cb29..4597dab142470 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.cpp
@@ -20,6 +20,7 @@
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Module.h"
+#include "llvm/MC/MCExpr.h"
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/EndianStream.h"
@@ -137,12 +138,22 @@ void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, unsigned Val) {
setRegister(getRsrc1Reg(CC), Val);
}
+void AMDGPUPALMetadata::setRsrc1(CallingConv::ID CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ setRegister(getRsrc1Reg(CC), Val, Ctx);
+}
+
// Set the rsrc2 register in the metadata for a particular shader stage.
// In fact this ORs the value into any previous setting of the register.
void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, unsigned Val) {
setRegister(getRsrc1Reg(CC) + 1, Val);
}
+void AMDGPUPALMetadata::setRsrc2(CallingConv::ID CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ setRegister(getRsrc1Reg(CC) + 1, Val, Ctx);
+}
+
// Set the SPI_PS_INPUT_ENA register in the metadata.
// In fact this ORs the value into any previous setting of the register.
void AMDGPUPALMetadata::setSpiPsInputEna(unsigned Val) {
@@ -182,6 +193,40 @@ void AMDGPUPALMetadata::setRegister(unsigned Reg, unsigned Val) {
N = N.getDocument()->getNode(Val);
}
+// Set a register in the metadata.
+// In fact this ORs the value into any previous setting of the register.
+void AMDGPUPALMetadata::setRegister(unsigned Reg, const MCExpr *Val,
+ MCContext &Ctx) {
+ if (!isLegacy()) {
+ // In the new MsgPack format, ignore register numbered >= 0x10000000. It
+ // is a PAL ABI pseudo-register in the old non-MsgPack format.
+ if (Reg >= 0x10000000)
+ return;
+ }
+ auto &N = getRegisters()[MsgPackDoc.getNode(Reg)];
+ bool RegSeenInREM = REM.find(Reg) != REM.end();
+
+ if (RegSeenInREM) {
+ Val = MCBinaryExpr::createOr(Val, REM[Reg], Ctx);
+ // This conditional may be redundant most of the time, but
+ // setRegister(unsigned, unsigned) could've been called while RegSeenInREM
+ // is true.
+ if (N.getKind() == msgpack::Type::UInt) {
+ const MCExpr *NExpr = MCConstantExpr::create(N.getUInt(), Ctx);
+ Val = MCBinaryExpr::createOr(Val, NExpr, Ctx);
+ }
+ REM[Reg] = Val;
+ } else if (N.getKind() == msgpack::Type::UInt) {
+ const MCExpr *NExpr = MCConstantExpr::create(N.getUInt(), Ctx);
+ Val = MCBinaryExpr::createOr(Val, NExpr, Ctx);
+ int64_t Unused;
+ if (!Val->evaluateAsAbsolute(Unused))
+ REM[Reg] = Val;
+ (void)Unused;
+ }
+ DelayedExprs.AssignDocNode(N, msgpack::Type::UInt, Val);
+}
+
// Set the entry point name for one shader.
void AMDGPUPALMetadata::setEntryPoint(unsigned CC, StringRef Name) {
if (isLegacy())
@@ -207,11 +252,29 @@ void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".vgpr_count"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setNumUsedVgprs(CallingConv::ID CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ unsigned NumUsedVgprsKey = getScratchSizeKey(CC) +
+ PALMD::Key::VS_NUM_USED_VGPRS -
+ PALMD::Key::VS_SCRATCH_SIZE;
+ setRegister(NumUsedVgprsKey, Val, Ctx);
+ return;
+ }
+ // Msgpack format.
+ setHwStage(CC, ".vgpr_count", msgpack::Type::UInt, Val);
+}
+
// Set the number of used agprs in the metadata.
void AMDGPUPALMetadata::setNumUsedAgprs(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".agpr_count"] = Val;
}
+void AMDGPUPALMetadata::setNumUsedAgprs(unsigned CC, const MCExpr *Val) {
+ setHwStage(CC, ".agpr_count", msgpack::Type::UInt, Val);
+}
+
// Set the number of used sgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of sgprs to allocate.
@@ -228,6 +291,20 @@ void AMDGPUPALMetadata::setNumUsedSgprs(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".sgpr_count"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setNumUsedSgprs(unsigned CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ unsigned NumUsedSgprsKey = getScratchSizeKey(CC) +
+ PALMD::Key::VS_NUM_USED_SGPRS -
+ PALMD::Key::VS_SCRATCH_SIZE;
+ setRegister(NumUsedSgprsKey, Val, Ctx);
+ return;
+ }
+ // Msgpack format.
+ setHwStage(CC, ".sgpr_count", msgpack::Type::UInt, Val);
+}
+
// Set the scratch size in the metadata.
void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
if (isLegacy()) {
@@ -239,6 +316,17 @@ void AMDGPUPALMetadata::setScratchSize(CallingConv::ID CC, unsigned Val) {
getHwStage(CC)[".scratch_memory_size"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setScratchSize(unsigned CC, const MCExpr *Val,
+ MCContext &Ctx) {
+ if (isLegacy()) {
+ // Old non-msgpack format.
+ setRegister(getScratchSizeKey(CC), Val, Ctx);
+ return;
+ }
+ // Msgpack format.
+ setHwStage(CC, ".scratch_memory_size", msgpack::Type::UInt, Val);
+}
+
// Set the stack frame size of a function in the metadata.
void AMDGPUPALMetadata::setFunctionScratchSize(StringRef FnName, unsigned Val) {
auto Node = getShaderFunction(FnName);
@@ -259,6 +347,12 @@ void AMDGPUPALMetadata::setFunctionNumUsedVgprs(StringRef FnName,
Node[".vgpr_count"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setFunctionNumUsedVgprs(StringRef FnName,
+ const MCExpr *Val) {
+ auto Node = getShaderFunction(FnName);
+ DelayedExprs.AssignDocNode(Node[".vgpr_count"], msgpack::Type::UInt, Val);
+}
+
// Set the number of used vgprs in the metadata.
void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName,
unsigned Val) {
@@ -266,6 +360,12 @@ void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName,
Node[".sgpr_count"] = MsgPackDoc.getNode(Val);
}
+void AMDGPUPALMetadata::setFunctionNumUsedSgprs(StringRef FnName,
+ const MCExpr *Val) {
+ auto Node = getShaderFunction(FnName);
+ DelayedExprs.AssignDocNode(Node[".sgpr_count"], msgpack::Type::UInt, Val);
+}
+
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
void AMDGPUPALMetadata::setWave32(unsigned CC) {
@@ -662,6 +762,7 @@ void AMDGPUPALMetadata::toString(std::string &String) {
String.clear();
if (!BlobType)
return;
+ ResolvedAll = DelayedExprs.ResolveDelayedExpressions();
raw_string_ostream Stream(String);
if (isLegacy()) {
if (MsgPackDoc.getRoot().getKind() == msgpack::Type::Nil)
@@ -711,6 +812,7 @@ void AMDGPUPALMetadata::toString(std::string &String) {
// a .note record of the specified AMD type. Returns an empty blob if
// there is no PAL metadata,
void AMDGPUPALMetadata::toBlob(unsigned Type, std::string &Blob) {
+ ResolvedAll = DelayedExprs.ResolveDelayedExpressions();
if (Type == ELF::NT_AMD_PAL_METADATA)
toLegacyBlob(Blob);
else if (Type)
@@ -906,11 +1008,17 @@ void AMDGPUPALMetadata::setLegacy() {
// Erase all PAL metadata.
void AMDGPUPALMetadata::reset() {
MsgPackDoc.clear();
+ REM.clear();
+ DelayedExprs.clear();
Registers = MsgPackDoc.getEmptyNode();
HwStages = MsgPackDoc.getEmptyNode();
ShaderFunctions = MsgPackDoc.getEmptyNode();
}
+bool AMDGPUPALMetadata::resolvedAllMCExpr() {
+ return ResolvedAll && DelayedExprs.empty();
+}
+
unsigned AMDGPUPALMetadata::getPALVersion(unsigned idx) {
assert(idx < 2 &&
"illegal index to PAL version - should be 0 (major) or 1 (minor)");
@@ -942,6 +1050,11 @@ void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field, bool Val) {
getHwStage(CC)[field] = Val;
}
+void AMDGPUPALMetadata::setHwStage(unsigned CC, StringRef field,
+ msgpack::Type Type, const MCExpr *Val) {
+ DelayedExprs.AssignDocNode(getHwStage(CC)[field], Type, Val);
+}
+
void AMDGPUPALMetadata::setComputeRegisters(StringRef field, unsigned Val) {
getComputeRegisters()[field] = Val;
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
index 158f766d04854..1dcdd4b985142 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUPALMetadata.h
@@ -13,7 +13,10 @@
#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
#define LLVM_LIB_TARGET_AMDGPU_AMDGPUPALMETADATA_H
+#include "AMDGPUDelayedMCExpr.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/BinaryFormat/MsgPackDocument.h"
+#include "llvm/MC/MCContext.h"
namespace llvm {
@@ -21,6 +24,10 @@ class Module;
class StringRef;
class AMDGPUPALMetadata {
+public:
+ using RegisterExprMap = DenseMap<unsigned, const MCExpr *>;
+
+private:
unsigned BlobType = 0;
msgpack::Document MsgPackDoc;
msgpack::DocNode Registers;
@@ -32,6 +39,10 @@ class AMDGPUPALMetadata {
msgpack::DocNode ComputeRegisters;
msgpack::DocNode GraphicsRegisters;
+ DelayedMCExpr DelayedExprs;
+ RegisterExprMap REM;
+ bool ResolvedAll = true;
+
public:
// Read the amdgpu.pal.metadata supplied by the frontend, ready for
// per-function modification.
@@ -45,10 +56,12 @@ class AMDGPUPALMetadata {
// Set the rsrc1 register in the metadata for a particular shader stage.
// In fact this ORs the value into any previous setting of the register.
void setRsrc1(unsigned CC, unsigned Val);
+ void setRsrc1(unsigned CC, const MCExpr *Val, MCContext &Ctx);
// Set the rsrc2 register in the metadata for a particular shader stage.
// In fact this ORs the value into any previous setting of the register.
void setRsrc2(unsigned CC, unsigned Val);
+ void setRsrc2(unsigned CC, const MCExpr *Val, MCContext &Ctx);
// Set the SPI_PS_INPUT_ENA register in the metadata.
// In fact this ORs the value into any previous setting of the register.
@@ -64,6 +77,7 @@ class AMDGPUPALMetadata {
// Set a register in the metadata.
// In fact this ORs the value into any previous setting of the register.
void setRegister(unsigned Reg, unsigned Val);
+ void setRegister(unsigned Reg, const MCExpr *Val, MCContext &Ctx);
// Set the entry point name for one shader.
void setEntryPoint(unsigned CC, StringRef Name);
@@ -72,18 +86,22 @@ class AMDGPUPALMetadata {
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of vgprs to allocate.
void setNumUsedVgprs(unsigned CC, unsigned Val);
+ void setNumUsedVgprs(unsigned CC, const MCExpr *Val, MCContext &Ctx);
// Set the number of used agprs in the metadata. This is an optional advisory
// record for logging etc;
void setNumUsedAgprs(unsigned CC, unsigned Val);
+ void setNumUsedAgprs(unsigned CC, const MCExpr *Val);
// Set the number of used sgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of sgprs to allocate.
void setNumUsedSgprs(unsigned CC, unsigned Val);
+ void setNumUsedSgprs(unsigned CC, const MCExpr *Val, MCContext &Ctx);
// Set the scratch size in the metadata.
void setScratchSize(unsigned CC, unsigned Val);
+ void setScratchSize(unsigned CC, const MCExpr *Val, MCContext &Ctx);
// Set the stack frame size of a function in the metadata.
void setFunctionScratchSize(StringRef FnName, unsigned Val);
@@ -97,11 +115,13 @@ class AMDGPUPALMetadata {
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of vgprs to allocate.
void setFunctionNumUsedVgprs(StringRef FnName, unsigned Val);
+ void setFunctionNumUsedVgprs(StringRef FnName, const MCExpr *Val);
// Set the number of used sgprs in the metadata. This is an optional advisory
// record for logging etc; wave dispatch actually uses the rsrc1 register for
// the shader stage to determine the number of sgprs to allocate.
void setFunctionNumUsedSgprs(StringRef FnName, unsigned Val);
+ void setFunctionNumUsedSgprs(StringRef FnName, const MCExpr *Val);
// Set the hardware register bit in PAL metadata to enable wave32 on the
// shader of the given calling convention.
@@ -138,6 +158,8 @@ class AMDGPUPALMetadata {
void setHwStage(unsigned CC, StringRef field, unsigned Val);
void setHwStage(unsigned CC, StringRef field, bool Val);
+ void setHwStage(unsigned CC, StringRef field, msgpack::Type Type,
+ const MCExpr *Val);
void setComputeRegisters(StringRef field, unsigned Val);
void setComputeRegisters(StringRef field, bool Val);
@@ -156,6 +178,8 @@ class AMDGPUPALMetadata {
// Erase all PAL metadata.
void reset();
+ bool resolvedAllMCExpr();
+
private:
// Return whether the blob type is legacy PAL metadata.
bool isLegacy() const;
diff --git a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
index 2f4ce8eaf1d60..09b8da9f5dd48 100644
--- a/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
+++ b/llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_component_library(LLVMAMDGPUUtils
AMDGPUAsmUtils.cpp
AMDGPUBaseInfo.cpp
+ AMDGPUDelayedMCExpr.cpp
AMDGPUMemoryUtils.cpp
AMDGPUPALMetadata.cpp
AMDKernelCodeTUtils.cpp
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-es.ll b/llvm/test/CodeGen/AMDGPU/amdpal-es.ll
index 679e0858819eb..657fe80be04da 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-es.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-es.ll
@@ -11,6 +11,7 @@
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
; GCN-NEXT: '0x2cca (SPI_SHADER_PGM_RSRC1_ES)': 0
+; GCN-NEXT: '0x2ccb (SPI_SHADER_PGM_RSRC2_ES)': 0
; GCN-NEXT: ...
; GCN-NEXT: .end_amdgpu_pal_metadata
define amdgpu_es half @es_amdpal(half %arg0) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll
index 75f7a1dc266d3..9f5eb3927c489 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-gs.ll
@@ -12,6 +12,7 @@
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
; GCN-NEXT: '0x2c8a (SPI_SHADER_PGM_RSRC1_GS)': 0
+; GCN-NEXT: '0x2c8b (SPI_SHADER_PGM_RSRC2_GS)': 0
; GCN-NEXT: ...
; GCN-NEXT: .end_amdgpu_pal_metadata
define amdgpu_gs half @gs_amdpal(half %arg0) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll
index c61578a967b62..7eacedf44d09d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-hs.ll
@@ -12,6 +12,7 @@
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
; GCN-NEXT: '0x2d0a (SPI_SHADER_PGM_RSRC1_HS)': 0
+; GCN-NEXT: '0x2d0b (SPI_SHADER_PGM_RSRC2_HS)': 0
; GCN-NEXT: ...
; GCN-NEXT: .end_amdgpu_pal_metadata
define amdgpu_hs half @hs_amdpal(half %arg0) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll b/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll
index 8162c824dc2ce..973eb561a9a3d 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-ls.ll
@@ -11,6 +11,7 @@
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
; GCN-NEXT: '0x2d4a (SPI_SHADER_PGM_RSRC1_LS)': 0
+; GCN-NEXT: '0x2d4b (SPI_SHADER_PGM_RSRC2_LS)': 0
; GCN-NEXT: ...
; GCN-NEXT: .end_amdgpu_pal_metadata
define amdgpu_ls half @ls_amdpal(half %arg0) {
diff --git a/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll b/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll
index c300ba187740c..e554bb8980cec 100644
--- a/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdpal-vs.ll
@@ -12,6 +12,7 @@
; GCN-NEXT: .scratch_memory_size: 0
; GCN: .registers:
; GCN-NEXT: '0x2c4a (SPI_SHADER_PGM_RSRC1_VS)': 0
+; GCN-NEXT: '0x2c4b (SPI_SHADER_PGM_RSRC2_VS)': 0
; GCN-NEXT: ...
; GCN-NEXT: .end_amdgpu_pal_metadata
define amdgpu_vs half @vs_amdpal(half %arg0) {
diff --git a/llvm/unittests/Target/AMDGPU/CMakeLists.txt b/llvm/unittests/Target/AMDGPU/CMakeLists.txt
index 2d7a47943df69..502aaaa90c07b 100644
--- a/llvm/unittests/Target/AMDGPU/CMakeLists.txt
+++ b/llvm/unittests/Target/AMDGPU/CMakeLists.txt
@@ -20,6 +20,7 @@ add_llvm_target_unittest(AMDGPUTests
AMDGPUUnitTests.cpp
DwarfRegMappings.cpp
ExecMayBeModifiedBeforeAnyUse.cpp
+ PALMetadata.cpp
)
set_property(TARGET AMDGPUTests PROPERTY FOLDER "Tests/UnitTests/TargetTests")
diff --git a/llvm/unittests/Target/AMDGPU/PALMetadata.cpp b/llvm/unittests/Target/AMDGPU/PALMetadata.cpp
new file mode 100644
index 0000000000000..f58a91c5df892
--- /dev/null
+++ b/llvm/unittests/Target/AMDGPU/PALMetadata.cpp
@@ -0,0 +1,245 @@
+//===- llvm/unittests/MC/AMDGPU/PALMetadata.cpp ---------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "AMDGPUTargetMachine.h"
+#include "GCNSubtarget.h"
+#include "SIProgramInfo.h"
+#include "Utils/AMDGPUPALMetadata.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Target/TargetMachine.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+class PALMetadata : public testing::Test {
+protected:
+ std::unique_ptr<GCNTargetMachine> TM;
+ std::unique_ptr<LLVMContext> Ctx;
+ std::unique_ptr<GCNSubtarget> ST;
+ std::unique_ptr<MachineModuleInfo> MMI;
+ std::unique_ptr<MachineFunction> MF;
+ std::unique_ptr<Module> M;
+ AMDGPUPALMetadata MD;
+
+ static void SetUpTestSuite() {
+ LLVMInitializeAMDGPUTargetInfo();
+ LLVMInitializeAMDGPUTarget();
+ LLVMInitializeAMDGPUTargetMC();
+ }
+
+ PALMetadata() {
+ std::string Triple = "amdgcn--amdpal";
+ std::string CPU = "gfx1010";
+ std::string FS = "";
+
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+ TargetOptions Options;
+
+ TM.reset(static_cast<GCNTargetMachine *>(TheTarget->createTargetMachine(
+ Triple, CPU, FS, Options, std::nullopt, std::nullopt)));
+
+ Ctx = std::make_unique<LLVMContext>();
+ M = std::make_unique<Module>("Module", *Ctx);
+ M->setDataLayout(TM->createDataLayout());
+ auto *FType = FunctionType::get(Type::getVoidTy(*Ctx), false);
+ auto *F = Function::Create(FType, GlobalValue::ExternalLinkage, "Test", *M);
+ MMI = std::make_unique<MachineModuleInfo>(TM.get());
+
+ ST = std::make_unique<GCNSubtarget>(TM->getTargetTriple(),
+ TM->getTargetCPU(),
+ TM->getTargetFeatureString(), *TM);
+
+ MF = std::make_unique<MachineFunction>(*F, *TM, *ST, 1, *MMI);
+ }
+};
+
+TEST_F(PALMetadata, ResourceRegisterSetORsResolvableUnknown) {
+ std::string yaml = "---\n"
+ "amdpal.pipelines:\n"
+ " - .hardware_stages:\n"
+ " .es:\n"
+ " .entry_point: Test\n"
+ " .scratch_memory_size: 0\n"
+ " .sgpr_count: 0x1\n"
+ " .vgpr_count: 0x1\n"
+ " .registers:\n"
+ " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n"
+ " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n"
+ "...\n";
+
+ MCContext &MCCtx = MF->getContext();
+ auto CC = CallingConv::AMDGPU_VS;
+ MD.setFromString(yaml);
+ MD.setRsrc2(CC, MCConstantExpr::create(42, MCCtx), MCCtx);
+ MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown");
+ MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx);
+ EXPECT_FALSE(MD.resolvedAllMCExpr());
+
+ MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx);
+ Sym->setVariableValue(MCConstantExpr::create(0xffff0000, MCCtx));
+ std::string Output;
+ MD.toString(Output);
+
+ EXPECT_TRUE(MD.resolvedAllMCExpr());
+
+ auto n = Output.find("\'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0xffffff2a");
+ EXPECT_TRUE(n != std::string::npos);
+}
+
+TEST_F(PALMetadata, ResourceRegisterSetORsResolvableUnknowns) {
+ std::string yaml = "---\n"
+ "amdpal.pipelines:\n"
+ " - .hardware_stages:\n"
+ " .es:\n"
+ " .entry_point: Test\n"
+ " .scratch_memory_size: 0\n"
+ " .sgpr_count: 0x1\n"
+ " .vgpr_count: 0x1\n"
+ " .registers:\n"
+ " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n"
+ " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n"
+ "...\n";
+
+ MCContext &MCCtx = MF->getContext();
+ auto CC = CallingConv::AMDGPU_VS;
+ MD.setFromString(yaml);
+ MCSymbol *SymOne = MCCtx.getOrCreateSymbol("UnknownOne");
+ MD.setRsrc2(CC, MCSymbolRefExpr::create(SymOne, MCCtx), MCCtx);
+
+ MD.setRsrc2(CC, MCConstantExpr::create(42, MCCtx), MCCtx);
+
+ MCSymbol *SymTwo = MCCtx.getOrCreateSymbol("UnknownTwo");
+ MD.setRsrc2(CC, MCSymbolRefExpr::create(SymTwo, MCCtx), MCCtx);
+ EXPECT_FALSE(MD.resolvedAllMCExpr());
+
+ SymOne->setVariableValue(MCConstantExpr::create(0xffff0000, MCCtx));
+ SymTwo->setVariableValue(MCConstantExpr::create(0x0000ff00, MCCtx));
+
+ std::string Output;
+ MD.toString(Output);
+
+ EXPECT_TRUE(MD.resolvedAllMCExpr());
+
+ auto n = Output.find("\'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0xffffff2a");
+ EXPECT_TRUE(n != std::string::npos);
+}
+
+TEST_F(PALMetadata, ResourceRegisterSetORsPreset) {
+ std::string yaml = "---\n"
+ "amdpal.pipelines:\n"
+ " - .hardware_stages:\n"
+ " .es:\n"
+ " .entry_point: Test\n"
+ " .scratch_memory_size: 0\n"
+ " .sgpr_count: 0x1\n"
+ " .vgpr_count: 0x1\n"
+ " .registers:\n"
+ " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n"
+ " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0x2a\n"
+ "...\n";
+
+ MCContext &MCCtx = MF->getContext();
+ auto CC = CallingConv::AMDGPU_VS;
+ MD.setFromString(yaml);
+ MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown");
+ MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx);
+ MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx);
+ Sym->setVariableValue(MCConstantExpr::create(0xffff0000, MCCtx));
+ std::string Output;
+ MD.toString(Output);
+
+ auto n = Output.find("\'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0xffffff2a");
+ EXPECT_TRUE(n != std::string::npos);
+}
+
+TEST_F(PALMetadata, ResourceRegisterSetORs) {
+ std::string yaml = "---\n"
+ "amdpal.pipelines:\n"
+ " - .hardware_stages:\n"
+ " .es:\n"
+ " .entry_point: Test\n"
+ " .scratch_memory_size: 0\n"
+ " .sgpr_count: 0x1\n"
+ " .vgpr_count: 0x1\n"
+ " .registers:\n"
+ " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n"
+ " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n"
+ "...\n";
+
+ MCContext &MCCtx = MF->getContext();
+ auto CC = CallingConv::AMDGPU_VS;
+ MD.setFromString(yaml);
+ MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown");
+ MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx);
+ MD.setRsrc2(CC, 42);
+ MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx);
+ Sym->setVariableValue(MCConstantExpr::create(0xffff0000, MCCtx));
+ std::string Output;
+ MD.toString(Output);
+
+ auto n = Output.find("\'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0xffffff2a");
+ EXPECT_TRUE(n != std::string::npos);
+}
+
+TEST_F(PALMetadata, ResourceRegisterSetUnresolvedSym) {
+ std::string yaml = "---\n"
+ "amdpal.pipelines:\n"
+ " - .hardware_stages:\n"
+ " .es:\n"
+ " .entry_point: Test\n"
+ " .scratch_memory_size: 0\n"
+ " .sgpr_count: 0x1\n"
+ " .vgpr_count: 0x1\n"
+ " .registers:\n"
+ " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n"
+ " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n"
+ "...\n";
+
+ MCContext &MCCtx = MF->getContext();
+ auto CC = CallingConv::AMDGPU_VS;
+ MD.setFromString(yaml);
+ MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown");
+ MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx);
+ MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx);
+ std::string Output;
+
+ MD.toString(Output);
+ EXPECT_FALSE(MD.resolvedAllMCExpr());
+}
+
+TEST_F(PALMetadata, ResourceRegisterSetNoEmitUnresolved) {
+ std::string yaml = "---\n"
+ "amdpal.pipelines:\n"
+ " - .hardware_stages:\n"
+ " .es:\n"
+ " .entry_point: Test\n"
+ " .scratch_memory_size: 0\n"
+ " .sgpr_count: 0x1\n"
+ " .vgpr_count: 0x1\n"
+ " .registers:\n"
+ " \'0x2c4a (SPI_SHADER_PGM_RSRC1_VS)\': 0x2f0000\n"
+ " \'0x2c4b (SPI_SHADER_PGM_RSRC2_VS)\': 0\n"
+ "...\n";
+
+ MCContext &MCCtx = MF->getContext();
+ auto CC = CallingConv::AMDGPU_VS;
+ MD.setFromString(yaml);
+ MCSymbol *Sym = MCCtx.getOrCreateSymbol("Unknown");
+ MD.setRsrc2(CC, MCSymbolRefExpr::create(Sym, MCCtx), MCCtx);
+ MD.setRsrc2(CC, MCConstantExpr::create(0xff00, MCCtx), MCCtx);
+
+ EXPECT_FALSE(MD.resolvedAllMCExpr());
+}
>From b8234b2890631bec3348df120c9670e5fc5f89d4 Mon Sep 17 00:00:00 2001
From: Janek van Oirschot <janek.vanoirschot at amd.com>
Date: Fri, 31 May 2024 14:21:10 +0100
Subject: [PATCH 2/2] Move the SIDefines helper functions for MCExpr cases to
its own util header, add comments with explanations
---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 27 +------
.../AMDGPU/Utils/AMDKernelCodeTUtils.cpp | 46 ++---------
llvm/lib/Target/AMDGPU/Utils/SIDefinesUtils.h | 79 +++++++++++++++++++
3 files changed, 86 insertions(+), 66 deletions(-)
create mode 100644 llvm/lib/Target/AMDGPU/Utils/SIDefinesUtils.h
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index f4028adc84828..f0c7a5efb53a8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -29,6 +29,7 @@
#include "TargetInfo/AMDGPUTargetInfo.h"
#include "Utils/AMDGPUBaseInfo.h"
#include "Utils/AMDKernelCodeTUtils.h"
+#include "Utils/SIDefinesUtils.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/BinaryFormat/ELF.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -1194,30 +1195,6 @@ static void EmitPALMetadataCommon(AMDGPUPALMetadata *MD,
getLdsDwGranularity(ST) * sizeof(uint32_t)));
}
-static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) {
- unsigned Shift = 0;
- unsigned Mask = 0;
-
- Mask = ~Value;
- for (; !(Mask & 1); Shift++, Mask >>= 1) {
- }
-
- return std::make_pair(Shift, Mask);
-}
-
-static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask,
- uint32_t Shift, MCContext &Ctx) {
- if (Mask) {
- const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
- Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
- }
- if (Shift) {
- const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
- Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
- }
- return Val;
-}
-
// This is the equivalent of EmitProgramInfoSI above, but for when the OS type
// is AMDPAL. It stores each compute/SPI register setting and other PAL
// metadata items into the PALMD::Metadata, combining with any provided by the
@@ -1249,7 +1226,7 @@ void AMDGPUAsmPrinter::EmitPALMetadata(const MachineFunction &MF,
MCBinaryExpr::createGT(CurrentProgramInfo.ScratchBlocks,
MCConstantExpr::create(0, Ctx), Ctx);
auto [Shift, Mask] = getShiftMask(C_00B84C_SCRATCH_EN);
- MD->setRsrc2(CC, MaskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
+ MD->setRsrc2(CC, maskShiftSet(HasScratchBlocks, Mask, Shift, Ctx), Ctx);
}
} else {
MD->setHwStage(CC, ".debug_mode", (bool)CurrentProgramInfo.DebugMode);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
index eaee1a2a97399..720d5a1853dbb 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDKernelCodeTUtils.cpp
@@ -14,6 +14,7 @@
#include "AMDKernelCodeT.h"
#include "SIDefines.h"
#include "Utils/AMDGPUBaseInfo.h"
+#include "Utils/SIDefinesUtils.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/MC/MCContext.h"
@@ -220,43 +221,6 @@ static int get_amd_kernel_code_t_FieldIndex(StringRef name) {
return map.lookup(name) - 1; // returns -1 if not found
}
-static constexpr std::pair<unsigned, unsigned> getShiftMask(unsigned Value) {
- unsigned Shift = 0;
- unsigned Mask = 0;
-
- Mask = ~Value;
- for (; !(Mask & 1); Shift++, Mask >>= 1) {
- }
-
- return std::make_pair(Shift, Mask);
-}
-
-static const MCExpr *MaskShiftSet(const MCExpr *Val, uint32_t Mask,
- uint32_t Shift, MCContext &Ctx) {
- if (Mask) {
- const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
- Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
- }
- if (Shift) {
- const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
- Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
- }
- return Val;
-}
-
-static const MCExpr *MaskShiftGet(const MCExpr *Val, uint32_t Mask,
- uint32_t Shift, MCContext &Ctx) {
- if (Shift) {
- const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
- Val = MCBinaryExpr::createLShr(Val, ShiftExpr, Ctx);
- }
- if (Mask) {
- const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
- Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
- }
- return Val;
-}
-
class PrintField {
public:
template <typename T, T AMDGPUMCKernelCodeT::*ptr,
@@ -305,10 +269,10 @@ static ArrayRef<PrintFx> getPrinterTable() {
const MCExpr *Value; \
if (PGMType == 0) { \
Value = \
- MaskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \
+ maskShiftGet(C.compute_pgm_resource1_registers, Mask, Shift, Ctx); \
} else { \
Value = \
- MaskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \
+ maskShiftGet(C.compute_pgm_resource2_registers, Mask, Shift, Ctx); \
} \
int64_t Val; \
if (Value->evaluateAsAbsolute(Val)) \
@@ -392,7 +356,7 @@ static ArrayRef<ParseFx> getParserTable() {
if (!parseExpr(MCParser, Value, Err)) \
return false; \
auto [Shift, Mask] = getShiftMask(Complement); \
- Value = MaskShiftSet(Value, Mask, Shift, Ctx); \
+ Value = maskShiftSet(Value, Mask, Shift, Ctx); \
const MCExpr *Compl = MCConstantExpr::create(Complement, Ctx); \
if (PGMType == 0) { \
C.compute_pgm_resource1_registers = MCBinaryExpr::createAnd( \
@@ -542,7 +506,7 @@ void AMDGPUMCKernelCodeT::EmitKernelCodeT(MCStreamer &OS, MCContext &Ctx) {
const MCExpr *CodeProps = MCConstantExpr::create(code_properties, Ctx);
CodeProps = MCBinaryExpr::createOr(
CodeProps,
- MaskShiftSet(is_dynamic_callstack,
+ maskShiftSet(is_dynamic_callstack,
(1 << AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_WIDTH) - 1,
AMD_CODE_PROPERTY_IS_DYNAMIC_CALLSTACK_SHIFT, Ctx),
Ctx);
diff --git a/llvm/lib/Target/AMDGPU/Utils/SIDefinesUtils.h b/llvm/lib/Target/AMDGPU/Utils/SIDefinesUtils.h
new file mode 100644
index 0000000000000..6565618ac523e
--- /dev/null
+++ b/llvm/lib/Target/AMDGPU/Utils/SIDefinesUtils.h
@@ -0,0 +1,79 @@
+//===-- SIDefines.h - SI Helper Functions -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+/// \file - utility functions for the SIDefines and its common uses.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_SIDEFINESUTILS_H
+#define LLVM_LIB_TARGET_AMDGPU_UTILS_SIDEFINESUTILS_H
+
+#include "llvm/MC/MCExpr.h"
+#include <utility>
+
+namespace llvm {
+class MCContext;
+namespace AMDGPU {
+
+/// Deduce the least significant bit aligned shift and mask values for a binary
+/// Complement \p Value (as they're defined in SIDefines.h as C_*) as a returned
+/// pair<shift, mask>. That is to say \p Value == ~(mask << shift)
+///
+/// For example, given C_00B848_FWD_PROGRESS (i.e., 0x7FFFFFFF) from
+/// SIDefines.h, this will return the pair as (31,1).
+constexpr inline std::pair<unsigned, unsigned> getShiftMask(unsigned Value) {
+ unsigned Shift = 0;
+ unsigned Mask = 0;
+
+ Mask = ~Value;
+ for (; !(Mask & 1); Shift++, Mask >>= 1) {
+ }
+
+ return std::make_pair(Shift, Mask);
+}
+
+/// Provided with the MCExpr * \p Val, uint32 \p Mask and \p Shift, will return
+/// the masked and left shifted, in said order of operations, MCExpr * created
+/// within the MCContext \p Ctx.
+///
+/// For example, given MCExpr *Val, Mask == 0xf, Shift == 6 the returned MCExpr
+/// * will be the equivalent of (Val & 0xf) << 6
+inline const MCExpr *maskShiftSet(const MCExpr *Val, uint32_t Mask,
+ uint32_t Shift, MCContext &Ctx) {
+ if (Mask) {
+ const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+ Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+ }
+ if (Shift) {
+ const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+ Val = MCBinaryExpr::createShl(Val, ShiftExpr, Ctx);
+ }
+ return Val;
+}
+
+/// Provided with the MCExpr * \p Val, uint32 \p Mask and \p Shift, will return
+/// the right shifted and masked, in said order of operations, MCExpr * created
+/// within the MCContext \p Ctx.
+///
+/// For example, given MCExpr *Val, Mask == 0xf, Shift == 6 the returned MCExpr
+/// * will be the equivalent of (Val >> 6) & 0xf
+inline const MCExpr *maskShiftGet(const MCExpr *Val, uint32_t Mask,
+ uint32_t Shift, MCContext &Ctx) {
+ if (Shift) {
+ const MCExpr *ShiftExpr = MCConstantExpr::create(Shift, Ctx);
+ Val = MCBinaryExpr::createLShr(Val, ShiftExpr, Ctx);
+ }
+ if (Mask) {
+ const MCExpr *MaskExpr = MCConstantExpr::create(Mask, Ctx);
+ Val = MCBinaryExpr::createAnd(Val, MaskExpr, Ctx);
+ }
+ return Val;
+}
+
+} // end namespace AMDGPU
+} // end namespace llvm
+
+#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_SIDEFINESUTILS_H
More information about the llvm-commits
mailing list