[PATCH] Add attributes for AMDGPU register limits.

Thu Dec 4 10:02:42 PST 2014

> On Dec 4, 2014, at 11:10 AM, Aaron Ballman <aaron at aaronballman.com> wrote:
> 
> On Wed, Dec 3, 2014 at 4:39 PM, Matt Arsenault
> <Matthew.Arsenault at amd.com> wrote:
>> Address review comments. Don't bother emitting the attribute if it's 0
>> 
>> http://reviews.llvm.org/D6288
>> 
>> Files:
>>  include/clang/Basic/Attr.td
>>  include/clang/Basic/AttrDocs.td
>>  include/clang/Basic/DiagnosticSemaKinds.td
>>  include/clang/Sema/AttributeList.h
>>  lib/CodeGen/TargetInfo.cpp
>>  lib/Sema/SemaDeclAttr.cpp
>>  test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
>>  test/SemaOpenCL/amdgpu-num-register-attrs.cl
> 
> Some comments and questions below.
> 
>> Index: include/clang/Basic/Attr.td
>> ===================================================================
>> --- include/clang/Basic/Attr.td
>> +++ include/clang/Basic/Attr.td
>> @@ -115,6 +115,10 @@
>> def FunctionLike : SubsetSubject<DeclBase,
>>                                   [{S->getFunctionType(false) != NULL}]>;
>> 
>> +def KernelFunction : SubsetSubject<Function, [{
>> +  S->hasAttr<OpenCLKernelAttr>()
>> +}]>;
> 
> Still not quite in love with this -- CUDA has kernel functions as
> well, so the name is misleading.

Had this before but apparently didn’t save the file. They’re called global functions in CUDA, so it might be slightly confusing.

> 
>> +
>> // HasFunctionProto is a more strict version of FunctionLike, so it should
>> // never be specified in a Subjects list along with FunctionLike (due to the
>> // inclusive nature of subject testing).
>> @@ -243,6 +247,7 @@
>>   let OSes = ["Win32"];
>> }
>> def TargetMips : TargetArch<["mips", "mipsel"]>;
>> +def TargetAMDGPU : TargetArch<["r600"]>;
>> 
>> class Attr {
>>   // The various ways in which an attribute can be spelled in source
>> @@ -885,6 +890,22 @@
>>   let Documentation = [Undocumented];
>> }
>> 
>> +def AMDGPUNumVGPR : InheritableAttr, TargetSpecificAttr<TargetAMDGPU> {
>> +  let Spellings = [GNU<"amdgpu_num_vgpr">];
>> +  let Args = [UnsignedArgument<"NumVGPR">];
>> +  let Documentation = [AMDGPUNumVGPRDocs];
>> +  let Subjects = SubjectList<[KernelFunction], ErrorDiag,
>> +                             "ExpectedKernelFunction">;
>> +}
>> +
>> +def AMDGPUNumSGPR : InheritableAttr, TargetSpecificAttr<TargetAMDGPU> {
>> +  let Spellings = [GNU<"amdgpu_num_sgpr">];
>> +  let Args = [UnsignedArgument<"NumSGPR">];
>> +  let Documentation = [AMDGPUNumSGPRDocs];
>> +  let Subjects = SubjectList<[KernelFunction], ErrorDiag,
>> +                              "ExpectedKernelFunction">;
>> +}
>> +
>> def NoSplitStack : InheritableAttr {
>>   let Spellings = [GCC<"no_split_stack">];
>>   let Subjects = SubjectList<[Function], ErrorDiag>;
>> Index: include/clang/Basic/AttrDocs.td
>> ===================================================================
>> --- include/clang/Basic/AttrDocs.td
>> +++ include/clang/Basic/AttrDocs.td
>> @@ -673,6 +673,64 @@
>>   }];
>> }
>> 
>> +def DocCatAMDGPURegisterAttributes :
>> +  DocumentationCategory<"AMD GPU Register Attributes"> {
>> +  let Content = [{
>> +Clang supports attributes for controlling register usage on AMD GPU
>> +targets. These attributes may be attached to a kernel function
>> +definition and is an optimization hint to the backend for the maximum
>> +number of registers to use. This is useful in cases where register
>> +limited occupancy is known to be an important factor for the
>> +performance for the kernel.
>> +
>> +The semantics are as follows:
>> +
>> +- The backend will attempt to limit the number of used registers to
>> +  the specified value, but the exact number used is not
>> +  guaranteed. The number used may be rounded up to satisfy the
>> +  allocation requirements or ABI constraints of the subtarget. For
>> +  example, on Southern Islands VGPRs may only be allocated in
>> +  increments of 4, so requesting a limit of 39 VGPRs will really
>> +  attempt to use up to 40. Requesting more registers than the
>> +  subtarget supports will truncate to the maximum allowed The backend
> 
> Missing a period between "allowed" and "The".
> 
>> +  may also use fewer registers than requested whenever possible.
>> +
>> +- 0 implies the default no limit on register usage.
>> +
>> +- Ignored on older subtargets.
> 
> What's an "older" subtarget? Or is this left purposefully vague so
> that current subtargets may become "older" in the future and behavior
> changes?
> 
>> +}];
>> +}
>> +
>> +
>> +def AMDGPUNumVGPRDocs : Documentation {
>> +  let Category = DocCatAMDGPURegisterAttributes;
>> +  let Content = [{
>> +Clang supports the
>> +``__attribute__((amdgpu_num_vgpr(<num_registers>)))`` attribute on AMD
>> +Southern Islands GPUs and later for controlling the number of vector
>> +registers. A typical value would be between 4 and 256 in increments
>> +of 4.
>> +}];
>> +}
>> +
>> +def AMDGPUNumSGPRDocs : Documentation {
>> +  let Category = DocCatAMDGPURegisterAttributes;
>> +  let Content = [{
>> +
>> +Clang supports the
>> +``__attribute__((amdgpu_num_sgpr(<num_registers>)))`` attribute on AMD
>> +Southern Islands GPUs and later for controlling the number of scalar
>> +registers. A typical value would be between 8 and 104 in increments of
>> +8.
>> +
>> +Due to common instruction constraints, an additional 2-4 SGPRs are
>> +typically required for internal use depending on features used. This
>> +value is a hint for the total number of SGPRs to use, and not the
>> +number of user SGPRs, so no special consideration needs to be given
>> +for these.
>> +}];
>> +}
>> +
>> def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
>>   let Content = [{
>> Clang supports several different calling conventions, depending on the target
>> Index: include/clang/Basic/DiagnosticSemaKinds.td
>> ===================================================================
>> --- include/clang/Basic/DiagnosticSemaKinds.td
>> +++ include/clang/Basic/DiagnosticSemaKinds.td
>> @@ -2244,7 +2244,7 @@
>>   "Objective-C instance methods|init methods of interface or class extension declarations|"
>>   "variables, functions and classes|Objective-C protocols|"
>>   "functions and global variables|structs or typedefs|"
>> -  "interface or protocol declarations}1">,
>> +  "interface or protocol declarations|kernel functions}1">,
>>   InGroup<IgnoredAttributes>;
>> def err_attribute_wrong_decl_type : Error<warn_attribute_wrong_decl_type.Text>;
>> def warn_type_attribute_wrong_type : Warning<
>> Index: include/clang/Sema/AttributeList.h
>> ===================================================================
>> --- include/clang/Sema/AttributeList.h
>> +++ include/clang/Sema/AttributeList.h
>> @@ -844,7 +844,8 @@
>>   ExpectedObjectiveCProtocol,
>>   ExpectedFunctionGlobalVarMethodOrProperty,
>>   ExpectedStructOrTypedef,
>> -  ExpectedObjectiveCInterfaceOrProtocol
>> +  ExpectedObjectiveCInterfaceOrProtocol,
>> +  ExpectedKernelFunction
>> };
>> 
>> }  // end namespace clang
>> Index: lib/CodeGen/TargetInfo.cpp
>> ===================================================================
>> --- lib/CodeGen/TargetInfo.cpp
>> +++ lib/CodeGen/TargetInfo.cpp
>> @@ -20,6 +20,7 @@
>> #include "clang/AST/RecordLayout.h"
>> #include "clang/CodeGen/CGFunctionInfo.h"
>> #include "clang/Frontend/CodeGenOptions.h"
>> +#include "llvm/ADT/StringExtras.h"
>> #include "llvm/ADT/Triple.h"
>> #include "llvm/IR/DataLayout.h"
>> #include "llvm/IR/Type.h"
>> @@ -6082,6 +6083,45 @@
>>   return AddrTyped;
>> }
>> 
>> +//===----------------------------------------------------------------------===//
>> +// AMDGPU ABI Implementation
>> +//===----------------------------------------------------------------------===//
>> +
>> +namespace {
>> +
>> +class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
>> +public:
>> +  AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
>> +    : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
>> +  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
>> +                           CodeGen::CodeGenModule &M) const override;
>> +};
>> +
>> +}
>> +
>> +void AMDGPUTargetCodeGenInfo::SetTargetAttributes(
>> +  const Decl *D,
>> +  llvm::GlobalValue *GV,
>> +  CodeGen::CodeGenModule &M) const {
>> +  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
>> +  if (!FD)
>> +    return;
>> +
>> +  if (const auto Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
>> +    llvm::Function *F = cast<llvm::Function>(GV);
>> +    uint32_t NumVGPR = Attr->getNumVGPR();
>> +    if (NumVGPR != 0)
>> +      F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR));
>> +  }
>> +
>> +  if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
>> +    llvm::Function *F = cast<llvm::Function>(GV);
>> +    unsigned NumSGPR = Attr->getNumSGPR();
>> +    if (NumSGPR != 0)
>> +      F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR));
>> +  }
>> +}
>> +
>> 
>> //===----------------------------------------------------------------------===//
>> // SPARC v9 ABI Implementation.
>> @@ -7143,6 +7183,8 @@
>>   }
>>   case llvm::Triple::hexagon:
>>     return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types));
>> +  case llvm::Triple::r600:
>> +    return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types));
>>   case llvm::Triple::sparcv9:
>>     return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types));
>>   case llvm::Triple::xcore:
>> Index: lib/Sema/SemaDeclAttr.cpp
>> ===================================================================
>> --- lib/Sema/SemaDeclAttr.cpp
>> +++ lib/Sema/SemaDeclAttr.cpp
>> @@ -3940,6 +3940,42 @@
>>     handleARMInterruptAttr(S, D, Attr);
>> }
>> 
>> +static int32_t handleAMDGPUNumRegAttrCommon(Sema &S, Decl *D,
>> +                                            const AttributeList &Attr) {
>> +  Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
>> +
>> +  uint32_t NumRegs = -1;
>> +  if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
>> +    return -1;
>> +
>> +  D->addAttr(UsedAttr::CreateImplicit(S.Context));
> 
> Why is this automatically adding a Used attribute? (I know the
> original code was as well, but I'm not certain I understand it.)

I think I just copied this blindly from the interrupt attribute. I removed it and the helper in the new version

> 
>> +  return NumRegs;
> 
> Implicit conversion from unsigned to signed. The return value being
> signed also causes implicit conversions from signed to unsigned in the
> caller when creating the attributes. Why not return a bool and return
> the number of registers by reference?
> 
>> +}
>> +
>> +static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
>> +                                    const AttributeList &Attr) {
>> +  int32_t NumRegs = handleAMDGPUNumRegAttrCommon(S, D, Attr);
>> +  if (NumRegs == -1)
>> +    return;
>> +
>> +  D->addAttr(::new (S.Context)
>> +             AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context,
>> +                               NumRegs,
>> +                               Attr.getAttributeSpellingListIndex()));
>> +}
>> +
>> +static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D,
>> +                                    const AttributeList &Attr) {
>> +  int32_t NumRegs = handleAMDGPUNumRegAttrCommon(S, D, Attr);
>> +  if (NumRegs == -1)
>> +    return;
>> +
>> +  D->addAttr(::new (S.Context)
>> +             AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context,
>> +                               NumRegs,
>> +                               Attr.getAttributeSpellingListIndex()));
>> +}
>> +
>> static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
>>                                               const AttributeList& Attr) {
>>   // If we try to apply it to a function pointer, don't warn, but don't
>> @@ -4247,6 +4283,12 @@
>>   case AttributeList::AT_NoMips16:
>>     handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
>>     break;
>> +  case AttributeList::AT_AMDGPUNumVGPR:
>> +    handleAMDGPUNumVGPRAttr(S, D, Attr);
>> +    break;
>> +  case AttributeList::AT_AMDGPUNumSGPR:
>> +    handleAMDGPUNumSGPRAttr(S, D, Attr);
>> +    break;
>>   case AttributeList::AT_IBAction:
>>     handleSimpleAttribute<IBActionAttr>(S, D, Attr);
>>     break;
>> Index: test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
>> ===================================================================
>> --- /dev/null
>> +++ test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
>> @@ -0,0 +1,41 @@
>> +// RUN: %clang_cc1 -triple r600-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
>> +
>> +__attribute__((amdgpu_num_vgpr(64)))
>> +kernel void test_num_vgpr64() {
>> +// CHECK: define void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]]
>> +}
>> +
>> +__attribute__((amdgpu_num_sgpr(32)))
>> +kernel void test_num_sgpr32() {
>> +// CHECK: define void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]]
>> +}
>> +
>> +__attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32)))
>> +kernel void test_num_vgpr64_sgpr32() {
>> +// CHECK: define void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]]
>> +
>> +}
>> +
>> +__attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40)))
>> +kernel void test_num_sgpr20_vgpr40() {
>> +// CHECK: define void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]]
>> +}
>> +
>> +__attribute__((amdgpu_num_vgpr(0)))
>> +kernel void test_num_vgpr0() {
>> +}
>> +
>> +__attribute__((amdgpu_num_sgpr(0)))
>> +kernel void test_num_sgpr0() {
>> +}
>> +
>> +__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0)))
>> +kernel void test_num_vgpr0_sgpr0() {
>> +}
>> +
>> +// CHECK-DAG-NOT: "amdgpu_num_vgpr"="0"
>> +// CHECK-DAG-NOT: "amdgpu_num_sgpr"="0"
>> +// CHECK-DAG: attributes [[ATTR_VGPR64]] = { nounwind "amdgpu_num_vgpr"="64"
>> +// CHECK-DAG: attributes [[ATTR_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32"
>> +// CHECK-DAG: attributes [[ATTR_VGPR64_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" "amdgpu_num_vgpr"="64"
>> +// CHECK-DAG: attributes [[ATTR_SGPR20_VGPR40]] = { nounwind "amdgpu_num_sgpr"="20" "amdgpu_num_vgpr"="40"
>> Index: test/SemaOpenCL/amdgpu-num-register-attrs.cl
>> ===================================================================
>> --- /dev/null
>> +++ test/SemaOpenCL/amdgpu-num-register-attrs.cl
>> @@ -0,0 +1,34 @@
>> +// RUN: %clang_cc1 -triple r600-- -verify -fsyntax-only %s
>> +
>> +typedef __attribute__((amdgpu_num_vgpr(128))) struct FooStruct { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
>> +  int x;
>> +  float y;
>> +} FooStruct;
>> +
>> +
>> +__attribute__((amdgpu_num_vgpr("ABC"))) kernel void foo2() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}}
>> +__attribute__((amdgpu_num_sgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}}
>> +
>> +
>> +__attribute__((amdgpu_num_vgpr(40))) void foo4() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
>> +__attribute__((amdgpu_num_sgpr(64))) void foo5() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
>> +
>> +__attribute__((amdgpu_num_vgpr(40))) kernel void foo7() {}
>> +__attribute__((amdgpu_num_sgpr(64))) kernel void foo8() {}
>> +__attribute__((amdgpu_num_vgpr(40), amdgpu_num_sgpr(64))) kernel void foo9() {}
>> +
>> +// Check 0 VGPR is accepted.
>> +__attribute__((amdgpu_num_vgpr(0))) kernel void foo10() {}
>> +
>> +// Check 0 SGPR is accepted.
>> +__attribute__((amdgpu_num_sgpr(0))) kernel void foo11() {}
>> +
>> +// Check both 0 SGPR and VGPR is accepted.
>> +__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) kernel void foo12() {}
>> +
>> +// Too large VGPR value.
>> +__attribute__((amdgpu_num_vgpr(4294967296))) kernel void foo13() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
>> +
>> +__attribute__((amdgpu_num_sgpr(4294967296))) kernel void foo14() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
>> +
>> +__attribute__((amdgpu_num_sgpr(4294967296), amdgpu_num_vgpr(4294967296))) kernel void foo15() {} // expected-error 2 {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
>> 
> 
> Thanks!
> 
> ~Aaron
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits