[PATCH] R600: Parse OpenCL metadata
Matt Arsenault
Matthew.Arsenault at amd.com
Mon Jun 30 15:51:42 PDT 2014
On 06/23/2014 08:52 AM, Tom Stellard wrote:
> On Sun, Jun 22, 2014 at 06:03:13AM +0000, Matt Arsenault wrote:
>> http://reviews.llvm.org/D4243
>>
>> Files:
>> lib/Target/R600/AMDGPUAsmPrinter.cpp
>> lib/Target/R600/AMDGPUMachineFunction.cpp
>> lib/Target/R600/AMDGPUMachineFunction.h
>> lib/Target/R600/SIDefines.h
>> test/CodeGen/R600/reqd_work_group_size.ll
>> Index: lib/Target/R600/AMDGPUAsmPrinter.cpp
>> ===================================================================
>> --- lib/Target/R600/AMDGPUAsmPrinter.cpp
>> +++ lib/Target/R600/AMDGPUAsmPrinter.cpp
>> @@ -78,6 +78,7 @@
>> EmitFunctionBody();
>>
>> if (isVerbose()) {
>> + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
>> const MCSectionELF *CommentSection
>> = Context.getELFSection(".AMDGPU.csdata",
>> ELF::SHT_PROGBITS, 0,
>> @@ -92,6 +93,15 @@
>> false);
>> OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
>> false);
>> +
>> + if (MFI->hasReqdWorkGroupSize()) {
>> + OutStreamer.emitRawComment(" NumThreadX: " +
>> + Twine(MFI->getReqdWorkGroupSize(0)), false);
>> + OutStreamer.emitRawComment(" NumThreadY: " +
>> + Twine(MFI->getReqdWorkGroupSize(1)), false);
>> + OutStreamer.emitRawComment(" NumThreadZ: " +
>> + Twine(MFI->getReqdWorkGroupSize(2)), false);
>> + }
>> } else {
>> R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
>> OutStreamer.emitRawComment(
>> @@ -288,7 +298,7 @@
>> const SIProgramInfo &KernelInfo) {
>> const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
>>
>> - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
>> + const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
>> unsigned RsrcReg;
>> switch (MFI->getShaderType()) {
>> default: // Fall through
>> @@ -316,7 +326,22 @@
>> if (MFI->getShaderType() == ShaderType::COMPUTE) {
>> OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
>> OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
>> +
>> + if (MFI->hasReqdWorkGroupSize()) {
>> + OutStreamer.EmitIntValue(R_00B81C_COMPUTE_NUM_THREAD_X, 4);
>> + OutStreamer.EmitIntValue(
>> + S_00B81C_NUM_THREAD_FULL(MFI->getReqdWorkGroupSize(0)), 4);
>> +
>> + OutStreamer.EmitIntValue(R_00B820_COMPUTE_NUM_THREAD_Y, 4);
>> + OutStreamer.EmitIntValue(
>> + S_00B820_NUM_THREAD_FULL(MFI->getReqdWorkGroupSize(1)), 4);
>> +
>> + OutStreamer.EmitIntValue(R_00B824_COMPUTE_NUM_THREAD_Z, 4);
>> + OutStreamer.EmitIntValue(
>> + S_00B824_NUM_THREAD_FULL(MFI->getReqdWorkGroupSize(2)), 4);
>> + }
>> }
>> +
>> if (MFI->getShaderType() == ShaderType::PIXEL) {
>> OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
>> OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
>> Index: lib/Target/R600/AMDGPUMachineFunction.cpp
>> ===================================================================
>> --- lib/Target/R600/AMDGPUMachineFunction.cpp
>> +++ lib/Target/R600/AMDGPUMachineFunction.cpp
>> @@ -1,7 +1,11 @@
>> #include "AMDGPUMachineFunction.h"
>> #include "AMDGPU.h"
>> +#include "llvm/CodeGen/MachineModuleInfo.h"
>> #include "llvm/IR/Attributes.h"
>> +#include "llvm/IR/Constants.h"
>> #include "llvm/IR/Function.h"
>> +#include "llvm/IR/Module.h"
>> +
>> using namespace llvm;
>>
>> static const char *const ShaderTypeAttribute = "ShaderType";
>> @@ -12,8 +16,14 @@
>> AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
>> MachineFunctionInfo(),
>> ShaderType(ShaderType::COMPUTE),
>> + IsKernel(false),
>> + ReqdWorkGroupSize{0},
>> + WorkGroupSizeHint{0},
>> + LocalMemoryObjects(),
>> LDSSize(0) {
>> - AttributeSet Set = MF.getFunction()->getAttributes();
>> + const Function *F = MF.getFunction();
>> +
>> + AttributeSet Set = F->getAttributes();
>> Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
>> ShaderTypeAttribute);
>>
>> @@ -22,4 +32,68 @@
>> if (Str.getAsInteger(0, ShaderType))
>> llvm_unreachable("Can't parse shader type!");
>> }
>> +
>> + const MachineModuleInfo &MMI = MF.getMMI();
>> + const Module *M = MMI.getModule();
>> +
>> + const NamedMDNode *Kernels = M->getNamedMetadata("opencl.kernels");
>> + if (!Kernels)
>> + return;
>> +
>> + for (const MDNode *K : Kernels->operands()) {
>> + unsigned N = K->getNumOperands();
>> + if (N == 0)
>> + continue;
>> +
>> + // We expect the first operand to be the function.
>> + const Value *First = K->getOperand(0);
>> + if (First == F) {
>> + IsKernel = true;
>> + findOpenCLKernelAttributes(K);
>> + break;
>> + }
>> + }
>> +}
>> +
>> +static void parseWorkgroupSize(uint32_t Size[3], const MDNode *Node) {
>> + unsigned N = Node->getNumOperands();
>> +
>> + for (unsigned I = 0; I < std::min(N - 1, 3u); ++I) {
>> + const ConstantInt *C = dyn_cast<ConstantInt>(Node->getOperand(I + 1));
>> + if (!C) {
>> + // This is malformed, just give up.
>> + Size[0] = 0;
>> + Size[1] = 0;
>> + Size[2] = 0;
>> + return;
>> + }
>> +
>> + Size[I] = C->getZExtValue();
>> + }
>> +}
>> +
>> +void AMDGPUMachineFunction::findOpenCLKernelAttributes(const MDNode *Node) {
>> + for (unsigned I = 1, E = Node->getNumOperands(); I != E; ++I) {
>> + const MDNode *Op = dyn_cast<MDNode>(Node->getOperand(I));
>> + if (!Op)
>> + continue;
>> +
>> + unsigned N = Op->getNumOperands();
>> + if (N == 0)
>> + continue;
>> +
>> + const MDString *NameNode = dyn_cast<MDString>(Op->getOperand(0));
>> + if (!NameNode)
>> + continue;
>> +
>> + StringRef Name = NameNode->getName();
>> +
>> + if (N == 4 && Name == "reqd_work_group_size")
>> + parseWorkgroupSize(ReqdWorkGroupSize, Op);
>> + else if (N == 4 && Name == "work_group_size_hint")
>> + parseWorkgroupSize(WorkGroupSizeHint, Op);
>> + else if (Name == "vec_type_hint") {
>> + // TODO: Do we care about this at all?
>> + }
>> + }
> Could you move the attribute parsing code to a util file, so it can also be
> used by the AMDGPUPromoteAllocas pass.
>
> -Tom
I was considering making it an analysis pass. There are a variety of
places that might want to check for this
>> }
>> Index: lib/Target/R600/AMDGPUMachineFunction.h
>> ===================================================================
>> --- lib/Target/R600/AMDGPUMachineFunction.h
>> +++ lib/Target/R600/AMDGPUMachineFunction.h
>> @@ -19,8 +19,13 @@
>> namespace llvm {
>>
>> class AMDGPUMachineFunction : public MachineFunctionInfo {
>> - virtual void anchor();
>> unsigned ShaderType;
>> + bool IsKernel;
>> + uint32_t ReqdWorkGroupSize[3];
>> + uint32_t WorkGroupSizeHint[3];
>> +
>> + virtual void anchor();
>> + void findOpenCLKernelAttributes(const MDNode *);
>>
>> public:
>> AMDGPUMachineFunction(const MachineFunction &MF);
>> @@ -33,6 +38,26 @@
>> unsigned getShaderType() const {
>> return ShaderType;
>> }
>> +
>> + bool isKernel() const {
>> + return IsKernel;
>> + }
>> +
>> + uint32_t getReqdWorkGroupSize(unsigned I) const {
>> + return ReqdWorkGroupSize[I];
>> + }
>> +
>> + uint32_t getWorkGroupSizeHint(unsigned I) const {
>> + return WorkGroupSizeHint[I];
>> + }
>> +
>> + bool hasReqdWorkGroupSize() const {
>> + return ReqdWorkGroupSize[0] != 0;
>> + }
>> +
>> + uint32_t getReqdWorkGroupSizeFlat() const {
>> + return ReqdWorkGroupSize[0] * ReqdWorkGroupSize[1] * ReqdWorkGroupSize[2];
>> + }
>> };
>>
>> }
>> Index: lib/Target/R600/SIDefines.h
>> ===================================================================
>> --- lib/Target/R600/SIDefines.h
>> +++ lib/Target/R600/SIDefines.h
>> @@ -35,4 +35,33 @@
>> #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15)
>> #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
>>
>> +
>> +#define R_00B804_COMPUTE_DIM_X 0x00B804
>> +#define R_00B808_COMPUTE_DIM_Y 0x00B808
>> +#define R_00B80C_COMPUTE_DIM_Z 0x00B80C
>> +#define R_00B810_COMPUTE_START_X 0x00B810
>> +#define R_00B814_COMPUTE_START_Y 0x00B814
>> +#define R_00B818_COMPUTE_START_Z 0x00B818
>> +#define R_00B81C_COMPUTE_NUM_THREAD_X 0x00B81C
>> +#define S_00B81C_NUM_THREAD_FULL(x) (((x) & 0xFFFF) << 0)
>> +#define G_00B81C_NUM_THREAD_FULL(x) (((x) >> 0) & 0xFFFF)
>> +#define C_00B81C_NUM_THREAD_FULL 0xFFFF0000
>> +#define S_00B81C_NUM_THREAD_PARTIAL(x) (((x) & 0xFFFF) << 16)
>> +#define G_00B81C_NUM_THREAD_PARTIAL(x) (((x) >> 16) & 0xFFFF)
>> +#define C_00B81C_NUM_THREAD_PARTIAL 0x0000FFFF
>> +#define R_00B820_COMPUTE_NUM_THREAD_Y 0x00B820
>> +#define S_00B820_NUM_THREAD_FULL(x) (((x) & 0xFFFF) << 0)
>> +#define G_00B820_NUM_THREAD_FULL(x) (((x) >> 0) & 0xFFFF)
>> +#define C_00B820_NUM_THREAD_FULL 0xFFFF0000
>> +#define S_00B820_NUM_THREAD_PARTIAL(x) (((x) & 0xFFFF) << 16)
>> +#define G_00B820_NUM_THREAD_PARTIAL(x) (((x) >> 16) & 0xFFFF)
>> +#define C_00B820_NUM_THREAD_PARTIAL 0x0000FFFF
>> +#define R_00B824_COMPUTE_NUM_THREAD_Z 0x00B824
>> +#define S_00B824_NUM_THREAD_FULL(x) (((x) & 0xFFFF) << 0)
>> +#define G_00B824_NUM_THREAD_FULL(x) (((x) >> 0) & 0xFFFF)
>> +#define C_00B824_NUM_THREAD_FULL 0xFFFF0000
>> +#define S_00B824_NUM_THREAD_PARTIAL(x) (((x) & 0xFFFF) << 16)
>> +#define G_00B824_NUM_THREAD_PARTIAL(x) (((x) >> 16) & 0xFFFF)
>> +#define C_00B824_NUM_THREAD_PARTIAL 0x0000FFFF
>> +
>> #endif // SIDEFINES_H_
>> Index: test/CodeGen/R600/reqd_work_group_size.ll
>> ===================================================================
>> --- /dev/null
>> +++ test/CodeGen/R600/reqd_work_group_size.ll
>> @@ -0,0 +1,17 @@
>> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
>> +
>> +; SI: NumThreadX: 32
>> +; SI: NumThreadY: 2
>> +; SI: NumThreadZ: 4
>> +define void @has_reqd_work_group_size(i32 addrspace(1)* nocapture %out) #0 {
>> +entry:
>> + store i32 0, i32 addrspace(1)* %out, align 4
>> + ret void
>> +}
>> +
>> +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
>> +
>> +!opencl.kernels = !{!0}
>> +
>> +!0 = metadata !{void (i32 addrspace(1)*)* @has_reqd_work_group_size, metadata !1}
>> +!1 = metadata !{metadata !"reqd_work_group_size", i32 32, i32 2, i32 4}
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list