[PATCH] R600: Parse OpenCL metadata

Mon Jun 23 08:52:19 PDT 2014

On Sun, Jun 22, 2014 at 06:03:13AM +0000, Matt Arsenault wrote:
> http://reviews.llvm.org/D4243
> 
> Files:
>   lib/Target/R600/AMDGPUAsmPrinter.cpp
>   lib/Target/R600/AMDGPUMachineFunction.cpp
>   lib/Target/R600/AMDGPUMachineFunction.h
>   lib/Target/R600/SIDefines.h
>   test/CodeGen/R600/reqd_work_group_size.ll

> Index: lib/Target/R600/AMDGPUAsmPrinter.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUAsmPrinter.cpp
> +++ lib/Target/R600/AMDGPUAsmPrinter.cpp
> @@ -78,6 +78,7 @@
>    EmitFunctionBody();
>  
>    if (isVerbose()) {
> +    const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
>      const MCSectionELF *CommentSection
>        = Context.getELFSection(".AMDGPU.csdata",
>                                ELF::SHT_PROGBITS, 0,
> @@ -92,6 +93,15 @@
>                                   false);
>        OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR),
>                                   false);
> +
> +      if (MFI->hasReqdWorkGroupSize()) {
> +        OutStreamer.emitRawComment(" NumThreadX: " +
> +                                   Twine(MFI->getReqdWorkGroupSize(0)), false);
> +        OutStreamer.emitRawComment(" NumThreadY: " +
> +                                   Twine(MFI->getReqdWorkGroupSize(1)), false);
> +        OutStreamer.emitRawComment(" NumThreadZ: " +
> +                                   Twine(MFI->getReqdWorkGroupSize(2)), false);
> +      }
>      } else {
>        R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
>        OutStreamer.emitRawComment(
> @@ -288,7 +298,7 @@
>                                           const SIProgramInfo &KernelInfo) {
>    const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>();
>  
> -  SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
> +  const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
>    unsigned RsrcReg;
>    switch (MFI->getShaderType()) {
>    default: // Fall through
> @@ -316,7 +326,22 @@
>    if (MFI->getShaderType() == ShaderType::COMPUTE) {
>      OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4);
>      OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4);
> +
> +    if (MFI->hasReqdWorkGroupSize()) {
> +      OutStreamer.EmitIntValue(R_00B81C_COMPUTE_NUM_THREAD_X, 4);
> +      OutStreamer.EmitIntValue(
> +        S_00B81C_NUM_THREAD_FULL(MFI->getReqdWorkGroupSize(0)), 4);
> +
> +      OutStreamer.EmitIntValue(R_00B820_COMPUTE_NUM_THREAD_Y, 4);
> +      OutStreamer.EmitIntValue(
> +        S_00B820_NUM_THREAD_FULL(MFI->getReqdWorkGroupSize(1)), 4);
> +
> +      OutStreamer.EmitIntValue(R_00B824_COMPUTE_NUM_THREAD_Z, 4);
> +      OutStreamer.EmitIntValue(
> +        S_00B824_NUM_THREAD_FULL(MFI->getReqdWorkGroupSize(2)), 4);
> +    }
>    }
> +
>    if (MFI->getShaderType() == ShaderType::PIXEL) {
>      OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
>      OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4);
> Index: lib/Target/R600/AMDGPUMachineFunction.cpp
> ===================================================================
> --- lib/Target/R600/AMDGPUMachineFunction.cpp
> +++ lib/Target/R600/AMDGPUMachineFunction.cpp
> @@ -1,7 +1,11 @@
>  #include "AMDGPUMachineFunction.h"
>  #include "AMDGPU.h"
> +#include "llvm/CodeGen/MachineModuleInfo.h"
>  #include "llvm/IR/Attributes.h"
> +#include "llvm/IR/Constants.h"
>  #include "llvm/IR/Function.h"
> +#include "llvm/IR/Module.h"
> +
>  using namespace llvm;
>  
>  static const char *const ShaderTypeAttribute = "ShaderType";
> @@ -12,8 +16,14 @@
>  AMDGPUMachineFunction::AMDGPUMachineFunction(const MachineFunction &MF) :
>    MachineFunctionInfo(),
>    ShaderType(ShaderType::COMPUTE),
> +  IsKernel(false),
> +  ReqdWorkGroupSize{0},
> +  WorkGroupSizeHint{0},
> +  LocalMemoryObjects(),
>    LDSSize(0) {
> -  AttributeSet Set = MF.getFunction()->getAttributes();
> +  const Function *F = MF.getFunction();
> +
> +  AttributeSet Set = F->getAttributes();
>    Attribute A = Set.getAttribute(AttributeSet::FunctionIndex,
>                                   ShaderTypeAttribute);
>  
> @@ -22,4 +32,68 @@
>      if (Str.getAsInteger(0, ShaderType))
>        llvm_unreachable("Can't parse shader type!");
>    }
> +
> +  const MachineModuleInfo &MMI = MF.getMMI();
> +  const Module *M = MMI.getModule();
> +
> +  const NamedMDNode *Kernels = M->getNamedMetadata("opencl.kernels");
> +  if (!Kernels)
> +    return;
> +
> +  for (const MDNode *K : Kernels->operands()) {
> +    unsigned N = K->getNumOperands();
> +    if (N == 0)
> +      continue;
> +
> +    // We expect the first operand to be the function.
> +    const Value *First = K->getOperand(0);
> +    if (First == F) {
> +      IsKernel = true;
> +      findOpenCLKernelAttributes(K);
> +      break;
> +    }
> +  }
> +}
> +
> +static void parseWorkgroupSize(uint32_t Size[3], const MDNode *Node) {
> +  unsigned N = Node->getNumOperands();
> +
> +  for (unsigned I = 0; I < std::min(N - 1, 3u); ++I) {
> +    const ConstantInt *C = dyn_cast<ConstantInt>(Node->getOperand(I + 1));
> +    if (!C) {
> +      // This is malformed, just give up.
> +      Size[0] = 0;
> +      Size[1] = 0;
> +      Size[2] = 0;
> +      return;
> +    }
> +
> +    Size[I] = C->getZExtValue();
> +  }
> +}
> +
> +void AMDGPUMachineFunction::findOpenCLKernelAttributes(const MDNode *Node) {
> +  for (unsigned I = 1, E = Node->getNumOperands(); I != E; ++I) {
> +    const MDNode *Op = dyn_cast<MDNode>(Node->getOperand(I));
> +    if (!Op)
> +      continue;
> +
> +    unsigned N = Op->getNumOperands();
> +    if (N == 0)
> +      continue;
> +
> +    const MDString *NameNode = dyn_cast<MDString>(Op->getOperand(0));
> +    if (!NameNode)
> +      continue;
> +
> +    StringRef Name = NameNode->getName();
> +
> +    if (N == 4 && Name == "reqd_work_group_size")
> +      parseWorkgroupSize(ReqdWorkGroupSize, Op);
> +    else if (N == 4 && Name == "work_group_size_hint")
> +      parseWorkgroupSize(WorkGroupSizeHint, Op);
> +    else if (Name == "vec_type_hint") {
> +      // TODO: Do we care about this at all?
> +    }
> +  }

Could you move the attribute parsing code to a util file, so it can also be
used by the AMDGPUPromoteAllocas pass.

-Tom

>  }
> Index: lib/Target/R600/AMDGPUMachineFunction.h
> ===================================================================
> --- lib/Target/R600/AMDGPUMachineFunction.h
> +++ lib/Target/R600/AMDGPUMachineFunction.h
> @@ -19,8 +19,13 @@
>  namespace llvm {
>  
>  class AMDGPUMachineFunction : public MachineFunctionInfo {
> -  virtual void anchor();
>    unsigned ShaderType;
> +  bool IsKernel;
> +  uint32_t ReqdWorkGroupSize[3];
> +  uint32_t WorkGroupSizeHint[3];
> +
> +  virtual void anchor();
> +  void findOpenCLKernelAttributes(const MDNode *);
>  
>  public:
>    AMDGPUMachineFunction(const MachineFunction &MF);
> @@ -33,6 +38,26 @@
>    unsigned getShaderType() const {
>      return ShaderType;
>    }
> +
> +  bool isKernel() const {
> +    return IsKernel;
> +  }
> +
> +  uint32_t getReqdWorkGroupSize(unsigned I) const {
> +    return ReqdWorkGroupSize[I];
> +  }
> +
> +  uint32_t getWorkGroupSizeHint(unsigned I) const {
> +    return WorkGroupSizeHint[I];
> +  }
> +
> +  bool hasReqdWorkGroupSize() const {
> +    return ReqdWorkGroupSize[0] != 0;
> +  }
> +
> +  uint32_t getReqdWorkGroupSizeFlat() const {
> +    return ReqdWorkGroupSize[0] * ReqdWorkGroupSize[1] * ReqdWorkGroupSize[2];
> +  }
>  };
>  
>  }
> Index: lib/Target/R600/SIDefines.h
> ===================================================================
> --- lib/Target/R600/SIDefines.h
> +++ lib/Target/R600/SIDefines.h
> @@ -35,4 +35,33 @@
>  #define   S_00B84C_LDS_SIZE(x)                                        (((x) & 0x1FF) << 15)
>  #define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
>  
> +
> +#define R_00B804_COMPUTE_DIM_X                                          0x00B804
> +#define R_00B808_COMPUTE_DIM_Y                                          0x00B808
> +#define R_00B80C_COMPUTE_DIM_Z                                          0x00B80C
> +#define R_00B810_COMPUTE_START_X                                        0x00B810
> +#define R_00B814_COMPUTE_START_Y                                        0x00B814
> +#define R_00B818_COMPUTE_START_Z                                        0x00B818
> +#define R_00B81C_COMPUTE_NUM_THREAD_X                                   0x00B81C
> +#define   S_00B81C_NUM_THREAD_FULL(x)                                 (((x) & 0xFFFF) << 0)
> +#define   G_00B81C_NUM_THREAD_FULL(x)                                 (((x) >> 0) & 0xFFFF)
> +#define   C_00B81C_NUM_THREAD_FULL                                    0xFFFF0000
> +#define   S_00B81C_NUM_THREAD_PARTIAL(x)                              (((x) & 0xFFFF) << 16)
> +#define   G_00B81C_NUM_THREAD_PARTIAL(x)                              (((x) >> 16) & 0xFFFF)
> +#define   C_00B81C_NUM_THREAD_PARTIAL                                 0x0000FFFF
> +#define R_00B820_COMPUTE_NUM_THREAD_Y                                   0x00B820
> +#define   S_00B820_NUM_THREAD_FULL(x)                                 (((x) & 0xFFFF) << 0)
> +#define   G_00B820_NUM_THREAD_FULL(x)                                 (((x) >> 0) & 0xFFFF)
> +#define   C_00B820_NUM_THREAD_FULL                                    0xFFFF0000
> +#define   S_00B820_NUM_THREAD_PARTIAL(x)                              (((x) & 0xFFFF) << 16)
> +#define   G_00B820_NUM_THREAD_PARTIAL(x)                              (((x) >> 16) & 0xFFFF)
> +#define   C_00B820_NUM_THREAD_PARTIAL                                 0x0000FFFF
> +#define R_00B824_COMPUTE_NUM_THREAD_Z                                   0x00B824
> +#define   S_00B824_NUM_THREAD_FULL(x)                                 (((x) & 0xFFFF) << 0)
> +#define   G_00B824_NUM_THREAD_FULL(x)                                 (((x) >> 0) & 0xFFFF)
> +#define   C_00B824_NUM_THREAD_FULL                                    0xFFFF0000
> +#define   S_00B824_NUM_THREAD_PARTIAL(x)                              (((x) & 0xFFFF) << 16)
> +#define   G_00B824_NUM_THREAD_PARTIAL(x)                              (((x) >> 16) & 0xFFFF)
> +#define   C_00B824_NUM_THREAD_PARTIAL                                 0x0000FFFF
> +
>  #endif // SIDEFINES_H_
> Index: test/CodeGen/R600/reqd_work_group_size.ll
> ===================================================================
> --- /dev/null
> +++ test/CodeGen/R600/reqd_work_group_size.ll
> @@ -0,0 +1,17 @@
> +; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI %s
> +
> +; SI: NumThreadX: 32
> +; SI: NumThreadY: 2
> +; SI: NumThreadZ: 4
> +define void @has_reqd_work_group_size(i32 addrspace(1)* nocapture %out) #0 {
> +entry:
> +  store i32 0, i32 addrspace(1)* %out, align 4
> +  ret void
> +}
> +
> +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
> +
> +!opencl.kernels = !{!0}
> +
> +!0 = metadata !{void (i32 addrspace(1)*)* @has_reqd_work_group_size, metadata !1}
> +!1 = metadata !{metadata !"reqd_work_group_size", i32 32, i32 2, i32 4}

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits