r223384 - Add attributes for AMDGPU register limits.

Matt Arsenault Matthew.Arsenault at amd.com
Thu Dec 4 12:38:18 PST 2014


Author: arsenm
Date: Thu Dec  4 14:38:18 2014
New Revision: 223384

URL: http://llvm.org/viewvc/llvm-project?rev=223384&view=rev
Log:
Add attributes for AMDGPU register limits.

This is a performance hint that can be applied to kernels
to attempt to limit the number of used registers.

Added:
    cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
    cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu
    cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl
Modified:
    cfe/trunk/include/clang/Basic/Attr.td
    cfe/trunk/include/clang/Basic/AttrDocs.td
    cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
    cfe/trunk/include/clang/Sema/AttributeList.h
    cfe/trunk/lib/CodeGen/TargetInfo.cpp
    cfe/trunk/lib/Sema/SemaDeclAttr.cpp

Modified: cfe/trunk/include/clang/Basic/Attr.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?rev=223384&r1=223383&r2=223384&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/Attr.td (original)
+++ cfe/trunk/include/clang/Basic/Attr.td Thu Dec  4 14:38:18 2014
@@ -115,6 +115,10 @@ def DeclBase : AttrSubject;
 def FunctionLike : SubsetSubject<DeclBase,
                                   [{S->getFunctionType(false) != NULL}]>;
 
+def OpenCLKernelFunction : SubsetSubject<Function, [{
+  S->hasAttr<OpenCLKernelAttr>()
+}]>;
+
 // HasFunctionProto is a more strict version of FunctionLike, so it should
 // never be specified in a Subjects list along with FunctionLike (due to the
 // inclusive nature of subject testing).
@@ -885,6 +889,34 @@ def NoMips16 : InheritableAttr, TargetSp
   let Documentation = [Undocumented];
 }
 
+// This is not a TargetSpecificAttr so that is silently accepted and
+// ignored on other targets as encouraged by the OpenCL spec.
+//
+// See OpenCL 1.2 6.11.5: "It is our intention that a particular
+// implementation of OpenCL be free to ignore all attributes and the
+// resulting executable binary will produce the same result."
+//
+// However, only AMD GPU targets will emit the corresponding IR
+// attribute.
+//
+// FIXME: This provides a sub-optimal error message if you attempt to
+// use this in CUDA, since CUDA does not use the same terminology.
+def AMDGPUNumVGPR : InheritableAttr {
+  let Spellings = [GNU<"amdgpu_num_vgpr">];
+  let Args = [UnsignedArgument<"NumVGPR">];
+  let Documentation = [AMDGPUNumVGPRDocs];
+  let Subjects = SubjectList<[OpenCLKernelFunction], ErrorDiag,
+                             "ExpectedKernelFunction">;
+}
+
+def AMDGPUNumSGPR : InheritableAttr {
+  let Spellings = [GNU<"amdgpu_num_sgpr">];
+  let Args = [UnsignedArgument<"NumSGPR">];
+  let Documentation = [AMDGPUNumSGPRDocs];
+  let Subjects = SubjectList<[OpenCLKernelFunction], ErrorDiag,
+                              "ExpectedKernelFunction">;
+}
+
 def NoSplitStack : InheritableAttr {
   let Spellings = [GCC<"no_split_stack">];
   let Subjects = SubjectList<[Function], ErrorDiag>;

Modified: cfe/trunk/include/clang/Basic/AttrDocs.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AttrDocs.td?rev=223384&r1=223383&r2=223384&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/AttrDocs.td (original)
+++ cfe/trunk/include/clang/Basic/AttrDocs.td Thu Dec  4 14:38:18 2014
@@ -673,6 +673,65 @@ The semantics are as follows:
   }];
 }
 
+def DocCatAMDGPURegisterAttributes :
+  DocumentationCategory<"AMD GPU Register Attributes"> {
+  let Content = [{
+Clang supports attributes for controlling register usage on AMD GPU
+targets. These attributes may be attached to a kernel function
+definition and is an optimization hint to the backend for the maximum
+number of registers to use. This is useful in cases where register
+limited occupancy is known to be an important factor for the
+performance for the kernel.
+
+The semantics are as follows:
+
+- The backend will attempt to limit the number of used registers to
+  the specified value, but the exact number used is not
+  guaranteed. The number used may be rounded up to satisfy the
+  allocation requirements or ABI constraints of the subtarget. For
+  example, on Southern Islands VGPRs may only be allocated in
+  increments of 4, so requesting a limit of 39 VGPRs will really
+  attempt to use up to 40. Requesting more registers than the
+  subtarget supports will truncate to the maximum allowed. The backend
+  may also use fewer registers than requested whenever possible.
+
+- 0 implies the default no limit on register usage.
+
+- Ignored on older VLIW subtargets which did not have separate scalar
+and vector registers, R600 through Northern Islands.
+}];
+}
+
+
+def AMDGPUNumVGPRDocs : Documentation {
+  let Category = DocCatAMDGPURegisterAttributes;
+  let Content = [{
+Clang supports the
+``__attribute__((amdgpu_num_vgpr(<num_registers>)))`` attribute on AMD
+Southern Islands GPUs and later for controlling the number of vector
+registers. A typical value would be between 4 and 256 in increments
+of 4.
+}];
+}
+
+def AMDGPUNumSGPRDocs : Documentation {
+  let Category = DocCatAMDGPURegisterAttributes;
+  let Content = [{
+
+Clang supports the
+``__attribute__((amdgpu_num_sgpr(<num_registers>)))`` attribute on AMD
+Southern Islands GPUs and later for controlling the number of scalar
+registers. A typical value would be between 8 and 104 in increments of
+8.
+
+Due to common instruction constraints, an additional 2-4 SGPRs are
+typically required for internal use depending on features used. This
+value is a hint for the total number of SGPRs to use, and not the
+number of user SGPRs, so no special consideration needs to be given
+for these.
+}];
+}
+
 def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
   let Content = [{
 Clang supports several different calling conventions, depending on the target

Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=223384&r1=223383&r2=223384&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Thu Dec  4 14:38:18 2014
@@ -2244,7 +2244,7 @@ def warn_attribute_wrong_decl_type : War
   "Objective-C instance methods|init methods of interface or class extension declarations|"
   "variables, functions and classes|Objective-C protocols|"
   "functions and global variables|structs or typedefs|"
-  "interface or protocol declarations}1">,
+  "interface or protocol declarations|kernel functions}1">,
   InGroup<IgnoredAttributes>;
 def err_attribute_wrong_decl_type : Error<warn_attribute_wrong_decl_type.Text>;
 def warn_type_attribute_wrong_type : Warning<

Modified: cfe/trunk/include/clang/Sema/AttributeList.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/AttributeList.h?rev=223384&r1=223383&r2=223384&view=diff
==============================================================================
--- cfe/trunk/include/clang/Sema/AttributeList.h (original)
+++ cfe/trunk/include/clang/Sema/AttributeList.h Thu Dec  4 14:38:18 2014
@@ -844,7 +844,8 @@ enum AttributeDeclKind {
   ExpectedObjectiveCProtocol,
   ExpectedFunctionGlobalVarMethodOrProperty,
   ExpectedStructOrTypedef,
-  ExpectedObjectiveCInterfaceOrProtocol
+  ExpectedObjectiveCInterfaceOrProtocol,
+  ExpectedKernelFunction
 };
 
 }  // end namespace clang

Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=223384&r1=223383&r2=223384&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Thu Dec  4 14:38:18 2014
@@ -20,6 +20,7 @@
 #include "clang/AST/RecordLayout.h"
 #include "clang/CodeGen/CGFunctionInfo.h"
 #include "clang/Frontend/CodeGenOptions.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/Type.h"
@@ -6082,6 +6083,45 @@ llvm::Value *HexagonABIInfo::EmitVAArg(l
   return AddrTyped;
 }
 
+//===----------------------------------------------------------------------===//
+// AMDGPU ABI Implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
+public:
+  AMDGPUTargetCodeGenInfo(CodeGenTypes &CGT)
+    : TargetCodeGenInfo(new DefaultABIInfo(CGT)) {}
+  void SetTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
+                           CodeGen::CodeGenModule &M) const override;
+};
+
+}
+
+void AMDGPUTargetCodeGenInfo::SetTargetAttributes(
+  const Decl *D,
+  llvm::GlobalValue *GV,
+  CodeGen::CodeGenModule &M) const {
+  const FunctionDecl *FD = dyn_cast<FunctionDecl>(D);
+  if (!FD)
+    return;
+
+  if (const auto Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
+    llvm::Function *F = cast<llvm::Function>(GV);
+    uint32_t NumVGPR = Attr->getNumVGPR();
+    if (NumVGPR != 0)
+      F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR));
+  }
+
+  if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
+    llvm::Function *F = cast<llvm::Function>(GV);
+    unsigned NumSGPR = Attr->getNumSGPR();
+    if (NumSGPR != 0)
+      F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR));
+  }
+}
+
 
 //===----------------------------------------------------------------------===//
 // SPARC v9 ABI Implementation.
@@ -7143,6 +7183,8 @@ const TargetCodeGenInfo &CodeGenModule::
   }
   case llvm::Triple::hexagon:
     return *(TheTargetCodeGenInfo = new HexagonTargetCodeGenInfo(Types));
+  case llvm::Triple::r600:
+    return *(TheTargetCodeGenInfo = new AMDGPUTargetCodeGenInfo(Types));
   case llvm::Triple::sparcv9:
     return *(TheTargetCodeGenInfo = new SparcV9TargetCodeGenInfo(Types));
   case llvm::Triple::xcore:

Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=223384&r1=223383&r2=223384&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Thu Dec  4 14:38:18 2014
@@ -3940,6 +3940,32 @@ static void handleInterruptAttr(Sema &S,
     handleARMInterruptAttr(S, D, Attr);
 }
 
+static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
+                                    const AttributeList &Attr) {
+  uint32_t NumRegs;
+  Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
+  if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
+    return;
+
+  D->addAttr(::new (S.Context)
+             AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context,
+                               NumRegs,
+                               Attr.getAttributeSpellingListIndex()));
+}
+
+static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D,
+                                    const AttributeList &Attr) {
+  uint32_t NumRegs;
+  Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
+  if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
+    return;
+
+  D->addAttr(::new (S.Context)
+             AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context,
+                               NumRegs,
+                               Attr.getAttributeSpellingListIndex()));
+}
+
 static void handleX86ForceAlignArgPointerAttr(Sema &S, Decl *D,
                                               const AttributeList& Attr) {
   // If we try to apply it to a function pointer, don't warn, but don't
@@ -4247,6 +4273,12 @@ static void ProcessDeclAttribute(Sema &S
   case AttributeList::AT_NoMips16:
     handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
     break;
+  case AttributeList::AT_AMDGPUNumVGPR:
+    handleAMDGPUNumVGPRAttr(S, D, Attr);
+    break;
+  case AttributeList::AT_AMDGPUNumSGPR:
+    handleAMDGPUNumSGPRAttr(S, D, Attr);
+    break;
   case AttributeList::AT_IBAction:
     handleSimpleAttribute<IBActionAttr>(S, D, Attr);
     break;

Added: cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl?rev=223384&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl (added)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl Thu Dec  4 14:38:18 2014
@@ -0,0 +1,48 @@
+// RUN: %clang_cc1 -triple r600-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
+
+// Make sure this is silently accepted on other targets.
+
+__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void test_num_vgpr64() {
+// CHECK: define void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void test_num_sgpr32() {
+// CHECK: define void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]]
+}
+
+__attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void test_num_vgpr64_sgpr32() {
+// CHECK: define void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]]
+
+}
+
+__attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) // expected-no-diagnostics
+kernel void test_num_sgpr20_vgpr40() {
+// CHECK: define void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]]
+}
+
+__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void test_num_vgpr0() {
+}
+
+__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void test_num_sgpr0() {
+}
+
+__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void test_num_vgpr0_sgpr0() {
+}
+
+
+// X86-NOT: "amdgpu_num_vgpr"
+// X86-NOT: "amdgpu_num_sgpr"
+
+// CHECK-DAG-NOT: "amdgpu_num_vgpr"="0"
+// CHECK-DAG-NOT: "amdgpu_num_sgpr"="0"
+// CHECK-DAG: attributes [[ATTR_VGPR64]] = { nounwind "amdgpu_num_vgpr"="64"
+// CHECK-DAG: attributes [[ATTR_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32"
+// CHECK-DAG: attributes [[ATTR_VGPR64_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" "amdgpu_num_vgpr"="64"
+// CHECK-DAG: attributes [[ATTR_SGPR20_VGPR40]] = { nounwind "amdgpu_num_sgpr"="20" "amdgpu_num_vgpr"="40"

Added: cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu?rev=223384&view=auto
==============================================================================
--- cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu (added)
+++ cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu Thu Dec  4 14:38:18 2014
@@ -0,0 +1,14 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+__attribute__((amdgpu_num_vgpr(64))) // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__global__ void test_num_vgpr() { }
+
+__attribute__((amdgpu_num_sgpr(32))) // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__global__ void test_num_sgpr() { }
+
+// expected-error at +2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+// expected-error at +1 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
+__global__ void test_num_vgpr_num_sgpr() { }

Added: cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl?rev=223384&view=auto
==============================================================================
--- cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl (added)
+++ cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl Thu Dec  4 14:38:18 2014
@@ -0,0 +1,34 @@
+// RUN: %clang_cc1 -triple r600-- -verify -fsyntax-only %s
+
+typedef __attribute__((amdgpu_num_vgpr(128))) struct FooStruct { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+  int x;
+  float y;
+} FooStruct;
+
+
+__attribute__((amdgpu_num_vgpr("ABC"))) kernel void foo2() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}}
+__attribute__((amdgpu_num_sgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}}
+
+
+__attribute__((amdgpu_num_vgpr(40))) void foo4() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_sgpr(64))) void foo5() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+
+__attribute__((amdgpu_num_vgpr(40))) kernel void foo7() {}
+__attribute__((amdgpu_num_sgpr(64))) kernel void foo8() {}
+__attribute__((amdgpu_num_vgpr(40), amdgpu_num_sgpr(64))) kernel void foo9() {}
+
+// Check 0 VGPR is accepted.
+__attribute__((amdgpu_num_vgpr(0))) kernel void foo10() {}
+
+// Check 0 SGPR is accepted.
+__attribute__((amdgpu_num_sgpr(0))) kernel void foo11() {}
+
+// Check both 0 SGPR and VGPR is accepted.
+__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) kernel void foo12() {}
+
+// Too large VGPR value.
+__attribute__((amdgpu_num_vgpr(4294967296))) kernel void foo13() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+
+__attribute__((amdgpu_num_sgpr(4294967296))) kernel void foo14() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+
+__attribute__((amdgpu_num_sgpr(4294967296), amdgpu_num_vgpr(4294967296))) kernel void foo15() {} // expected-error 2 {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}





More information about the cfe-commits mailing list