[PATCH] R600: Parse OpenCL metadata
Tom Stellard
tom at stellard.net
Mon Jun 23 08:52:19 PDT 2014
On Sun, Jun 22, 2014 at 06:03:13AM +0000, Matt Arsenault wrote:
> http://reviews.llvm.org/D4243
>
> Files:
> lib/Target/R600/AMDGPUAsmPrinter.cpp
> lib/Target/R600/AMDGPUMachineFunction.cpp
> lib/Target/R600/AMDGPUMachineFunction.h
> lib/Target/R600/SIDefines.h
> test/CodeGen/R600/reqd_work_group_size.ll
> Index: lib/Target/R600/AMDGPUAsmPrinter.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -78,6 +78,7 @@
> EmitFunctionBody();
>
> if (isVerbose()) {
> + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> const MCSectionELF *CommentSection
> = Context.getELFSection(".AMDGPU.csdata",
> ELF::SHT_PROGBITS, 0,
> @@ -92,6 +93,15 @@
> false);
> OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
> false);
> +
> + if (MFI->hasReqdWorkGroupSize()) {
> + OutStreamer.emitRawComment(" NumThreadX: " +
> + Twine(MFI->getReqdWorkGroupSize(0)), false);
> + OutStreamer.emitRawComment(" NumThreadY: " +
> + Twine(MFI->getReqdWorkGroupSize(1)), false);
> + OutStreamer.emitRawComment(" NumThreadZ: " +
> + Twine(MFI->getReqdWorkGroupSize(2)), false);
> + }
> } else {
> R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
> OutStreamer.emitRawComment(
> @@ -288,7 +298,7 @@
> const SIProgramInfo &KernelInfo) {
> const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
>
> - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> unsigned RsrcReg;
> switch (MFI->getShaderType()) {
> default: // Fall through
> @@ -316,7 +326,22 @@
> if (MFI->getShaderType() == ShaderType::COMPUTE) {
> OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
> OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
> +
> + if (MFI->hasReqdWorkGroupSize()) {
> + OutStreamer.EmitIntValue(R_00B81C_COMPUTE_NUM_THREAD_X, 4);
> + OutStreamer.EmitIntValue(
> + S_00B81C_NUM_THREAD_FULL(MFI->getReqdWorkGroupSize(0)), 4);
> +
> + OutStreamer.EmitIntValue(R_00B820_COMPUTE_NUM_THREAD_Y, 4);
> + OutStreamer.EmitIntValue(
> + S_00B820_NUM_THREAD_FULL(MFI->getReqdWorkGroupSize(1)), 4);
> +
> + OutStreamer.EmitIntValue(R_00B824_COMPUTE_NUM_THREAD_Z, 4);
> + OutStreamer.EmitIntValue(
> + S_00B824_NUM_THREAD_FULL(MFI->getReqdWorkGroupSize(2)), 4);
> + }
> }
> +
> if (MFI->getShaderType() == ShaderType::PIXEL) {
> OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
> OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
> Index: lib/Target/R600/AMDGPUMachineFunction.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUMachineFunction.cpp
> +++ lib/Target/R600/AMDGPUMachineFunction.cpp
> @@ -1,7 +1,11 @@
> #include "AMDGPUMachineFunction.h"
> #include "AMDGPU.h"
> +#include "llvm/CodeGen/MachineModuleInfo.h"
> #include "llvm/IR/Attributes.h"
> +#include "llvm/IR/Constants.h"
> #include "llvm/IR/Function.h"
> +#include "llvm/IR/Module.h"
> +
> using namespace llvm;
>
> static const char *const ShaderTypeAttribute = "ShaderType";
> @@ -12,8 +16,14 @@
> AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
> MachineFunctionInfo(),
> ShaderType(ShaderType::COMPUTE),
> + IsKernel(false),
> + ReqdWorkGroupSize{0},
> + WorkGroupSizeHint{0},
> + LocalMemoryObjects(),
> LDSSize(0) {
> - AttributeSet Set = MF.getFunction()->getAttributes();
> + const Function *F = MF.getFunction();
> +
> + AttributeSet Set = F->getAttributes();
> Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
> ShaderTypeAttribute);
>
> @@ -22,4 +32,68 @@
> if (Str.getAsInteger(0, ShaderType))
> llvm_unreachable("Can't parse shader type!");
> }
> +
> + const MachineModuleInfo &MMI = MF.getMMI();
> + const Module *M = MMI.getModule();
> +
> + const NamedMDNode *Kernels = M->getNamedMetadata("opencl.kernels");
> + if (!Kernels)
> + return;
> +
> + for (const MDNode *K : Kernels->operands()) {
> + unsigned N = K->getNumOperands();
> + if (N == 0)
> + continue;
> +
> + // We expect the first operand to be the function.
> + const Value *First = K->getOperand(0);
> + if (First == F) {
> + IsKernel = true;
> + findOpenCLKernelAttributes(K);
> + break;
> + }
> + }
> +}
> +
> +static void parseWorkgroupSize(uint32_t Size[3], const MDNode *Node) {
> + unsigned N = Node->getNumOperands();
> +
> + for (unsigned I = 0; I < std::min(N - 1, 3u); ++I) {
> + const ConstantInt *C = dyn_cast<ConstantInt>(Node->getOperand(I + 1));
> + if (!C) {
> + // This is malformed, just give up.
> + Size[0] = 0;
> + Size[1] = 0;
> + Size[2] = 0;
> + return;
> + }
> +
> + Size[I] = C->getZExtValue();
> + }
> +}
> +
> +void AMDGPUMachineFunction::findOpenCLKernelAttributes(const MDNode *Node) {
> + for (unsigned I = 1, E = Node->getNumOperands(); I != E; ++I) {
> + const MDNode *Op = dyn_cast<MDNode>(Node->getOperand(I));
> + if (!Op)
> + continue;
> +
> + unsigned N = Op->getNumOperands();
> + if (N == 0)
> + continue;
> +
> + const MDString *NameNode = dyn_cast<MDString>(Op->getOperand(0));
> + if (!NameNode)
> + continue;
> +
> + StringRef Name = NameNode->getName();
> +
> + if (N == 4 && Name == "reqd_work_group_size")
> + parseWorkgroupSize(ReqdWorkGroupSize, Op);
> + else if (N == 4 && Name == "work_group_size_hint")
> + parseWorkgroupSize(WorkGroupSizeHint, Op);
> + else if (Name == "vec_type_hint") {
> + // TODO: Do we care about this at all?
> + }
> + }
Could you move the attribute parsing code to a util file, so it can also be
used by the AMDGPUPromoteAllocas pass.
-Tom
> }
> Index: lib/Target/R600/AMDGPUMachineFunction.h
> ===================================================================
> --- lib/Target/R600/AMDGPUMachineFunction.h
> +++ lib/Target/R600/AMDGPUMachineFunction.h
> @@ -19,8 +19,13 @@
> namespace llvm {
>
> class AMDGPUMachineFunction : public MachineFunctionInfo {
> - virtual void anchor();
> unsigned ShaderType;
> + bool IsKernel;
> + uint32_t ReqdWorkGroupSize[3];
> + uint32_t WorkGroupSizeHint[3];
> +
> + virtual void anchor();
> + void findOpenCLKernelAttributes(const MDNode *);
>
> public:
> AMDGPUMachineFunction(const MachineFunction &MF);
> @@ -33,6 +38,26 @@
> unsigned getShaderType() const {
> return ShaderType;
> }
> +
> + bool isKernel() const {
> + return IsKernel;
> + }
> +
> + uint32_t getReqdWorkGroupSize(unsigned I) const {
> + return ReqdWorkGroupSize[I];
> + }
> +
> + uint32_t getWorkGroupSizeHint(unsigned I) const {
> + return WorkGroupSizeHint[I];
> + }
> +
> + bool hasReqdWorkGroupSize() const {
> + return ReqdWorkGroupSize[0] != 0;
> + }
> +
> + uint32_t getReqdWorkGroupSizeFlat() const {
> + return ReqdWorkGroupSize[0] * ReqdWorkGroupSize[1] * ReqdWorkGroupSize[2];
> + }
> };
>
> }
> Index: lib/Target/R600/SIDefines.h
> ===================================================================
> --- lib/Target/R600/SIDefines.h
> +++ lib/Target/R600/SIDefines.h
> @@ -35,4 +35,33 @@
> #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
> #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
>
> +
> +#define R_00B804_COMPUTE_DIM_X 0x00B804
> +#define R_00B808_COMPUTE_DIM_Y 0x00B808
> +#define R_00B80C_COMPUTE_DIM_Z 0x00B80C
> +#define R_00B810_COMPUTE_START_X 0x00B810
> +#define R_00B814_COMPUTE_START_Y 0x00B814
> +#define R_00B818_COMPUTE_START_Z 0x00B818
> +#define R_00B81C_COMPUTE_NUM_THREAD_X 0x00B81C
> +#define S_00B81C_NUM_THREAD_FULL(x) (((x) & 0xFFFF) << 0)
> +#define G_00B81C_NUM_THREAD_FULL(x) (((x) >> 0) & 0xFFFF)
> +#define C_00B81C_NUM_THREAD_FULL 0xFFFF0000
> +#define S_00B81C_NUM_THREAD_PARTIAL(x) (((x) & 0xFFFF) << 16)
> +#define G_00B81C_NUM_THREAD_PARTIAL(x) (((x) >> 16) & 0xFFFF)
> +#define C_00B81C_NUM_THREAD_PARTIAL 0x0000FFFF
> +#define R_00B820_COMPUTE_NUM_THREAD_Y 0x00B820
> +#define S_00B820_NUM_THREAD_FULL(x) (((x) & 0xFFFF) << 0)
> +#define G_00B820_NUM_THREAD_FULL(x) (((x) >> 0) & 0xFFFF)
> +#define C_00B820_NUM_THREAD_FULL 0xFFFF0000
> +#define S_00B820_NUM_THREAD_PARTIAL(x) (((x) & 0xFFFF) << 16)
> +#define G_00B820_NUM_THREAD_PARTIAL(x) (((x) >> 16) & 0xFFFF)
> +#define C_00B820_NUM_THREAD_PARTIAL 0x0000FFFF
> +#define R_00B824_COMPUTE_NUM_THREAD_Z 0x00B824
> +#define S_00B824_NUM_THREAD_FULL(x) (((x) & 0xFFFF) << 0)
> +#define G_00B824_NUM_THREAD_FULL(x) (((x) >> 0) & 0xFFFF)
> +#define C_00B824_NUM_THREAD_FULL 0xFFFF0000
> +#define S_00B824_NUM_THREAD_PARTIAL(x) (((x) & 0xFFFF) << 16)
> +#define G_00B824_NUM_THREAD_PARTIAL(x) (((x) >> 16) & 0xFFFF)
> +#define C_00B824_NUM_THREAD_PARTIAL 0x0000FFFF
> +
> #endif // SIDEFINES_H_
> Index: test/CodeGen/R600/reqd_work_group_size.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/reqd_work_group_size.ll
> @@ -0,0 +1,17 @@
> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
> +
> +; SI: NumThreadX: 32
> +; SI: NumThreadY: 2
> +; SI: NumThreadZ: 4
> +define void @has_reqd_work_group_size(i32 addrspace(1)* nocapture %out) #0 {
> +entry:
> + store i32 0, i32 addrspace(1)* %out, align 4
> + ret void
> +}
> +
> +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
> +
> +!opencl.kernels = !{!0}
> +
> +!0 = metadata !{void (i32 addrspace(1)*)* @has_reqd_work_group_size, metadata !1}
> +!1 = metadata !{metadata !"reqd_work_group_size", i32 32, i32 2, i32 4}
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list