r282371 - [AMDGPU] Expose flat work group size, register and wave control attributes
Konstantin Zhuravlyov via cfe-commits
cfe-commits at lists.llvm.org
Sun Sep 25 18:02:57 PDT 2016
Author: kzhuravl
Date: Sun Sep 25 20:02:57 2016
New Revision: 282371
URL: http://llvm.org/viewvc/llvm-project?rev=282371&view=rev
Log:
[AMDGPU] Expose flat work group size, register and wave control attributes
__attribute__((amdgpu_flat_work_group_size(<min>, <max>))) - request minimum and maximum flat work group size
__attribute__((amdgpu_waves_per_eu(<min>[, <max>]))) - request minimum and/or maximum waves per execution unit
Differential Revision: https://reviews.llvm.org/D24513
Added:
cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl
cfe/trunk/test/SemaCUDA/amdgpu-attrs.cu
cfe/trunk/test/SemaOpenCL/amdgpu-attrs.cl
Removed:
cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu
cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl
Modified:
cfe/trunk/include/clang/Basic/Attr.td
cfe/trunk/include/clang/Basic/AttrDocs.td
cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
cfe/trunk/lib/CodeGen/TargetInfo.cpp
cfe/trunk/lib/Sema/SemaDeclAttr.cpp
Modified: cfe/trunk/include/clang/Basic/Attr.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Attr.td?rev=282371&r1=282370&r2=282371&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/Attr.td (original)
+++ cfe/trunk/include/clang/Basic/Attr.td Sun Sep 25 20:02:57 2016
@@ -1050,24 +1050,37 @@ def NoMips16 : InheritableAttr, TargetSp
//
// FIXME: This provides a sub-optimal error message if you attempt to
// use this in CUDA, since CUDA does not use the same terminology.
-def AMDGPUNumVGPR : InheritableAttr {
- let Spellings = [GNU<"amdgpu_num_vgpr">];
- let Args = [UnsignedArgument<"NumVGPR">];
- let Documentation = [AMDGPUNumVGPRDocs];
-
-// FIXME: This should be for OpenCLKernelFunction, but is not to
+//
+// FIXME: SubjectList should be for OpenCLKernelFunction, but is not to
// workaround needing to see kernel attribute before others to know if
// this should be rejected on non-kernels.
- let Subjects = SubjectList<[Function], ErrorDiag,
- "ExpectedKernelFunction">;
+
+def AMDGPUFlatWorkGroupSize : InheritableAttr {
+ let Spellings = [GNU<"amdgpu_flat_work_group_size">];
+ let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max">];
+ let Documentation = [AMDGPUFlatWorkGroupSizeDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
+}
+
+def AMDGPUWavesPerEU : InheritableAttr {
+ let Spellings = [GNU<"amdgpu_waves_per_eu">];
+ let Args = [UnsignedArgument<"Min">, UnsignedArgument<"Max", 1>];
+ let Documentation = [AMDGPUWavesPerEUDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
}
def AMDGPUNumSGPR : InheritableAttr {
let Spellings = [GNU<"amdgpu_num_sgpr">];
let Args = [UnsignedArgument<"NumSGPR">];
- let Documentation = [AMDGPUNumSGPRDocs];
- let Subjects = SubjectList<[Function], ErrorDiag,
- "ExpectedKernelFunction">;
+ let Documentation = [AMDGPUNumSGPRNumVGPRDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
+}
+
+def AMDGPUNumVGPR : InheritableAttr {
+ let Spellings = [GNU<"amdgpu_num_vgpr">];
+ let Args = [UnsignedArgument<"NumVGPR">];
+ let Documentation = [AMDGPUNumSGPRNumVGPRDocs];
+ let Subjects = SubjectList<[Function], ErrorDiag, "ExpectedKernelFunction">;
}
def NoSplitStack : InheritableAttr {
Modified: cfe/trunk/include/clang/Basic/AttrDocs.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/AttrDocs.td?rev=282371&r1=282370&r2=282371&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/AttrDocs.td (original)
+++ cfe/trunk/include/clang/Basic/AttrDocs.td Sun Sep 25 20:02:57 2016
@@ -889,12 +889,12 @@ variable, a function or method, a functi
enumerator, a non-static data member, or a label.
.. code-block: c++
- #include <cassert>
-
- [[maybe_unused]] void f([[maybe_unused]] bool thing1,
- [[maybe_unused]] bool thing2) {
- [[maybe_unused]] bool b = thing1 && thing2;
- assert(b);
+ #include <cassert>
+
+ [[maybe_unused]] void f([[maybe_unused]] bool thing1,
+ [[maybe_unused]] bool thing2) {
+ [[maybe_unused]] bool b = thing1 && thing2;
+ assert(b);
}
}];
}
@@ -911,15 +911,15 @@ potentially-evaluated discarded-value ex
`void`.
.. code-block: c++
- struct [[nodiscard]] error_info { /*...*/ };
- error_info enable_missile_safety_mode();
-
- void launch_missiles();
- void test_missiles() {
- enable_missile_safety_mode(); // diagnoses
- launch_missiles();
- }
- error_info &foo();
+ struct [[nodiscard]] error_info { /*...*/ };
+ error_info enable_missile_safety_mode();
+
+ void launch_missiles();
+ void test_missiles() {
+ enable_missile_safety_mode(); // diagnoses
+ launch_missiles();
+ }
+ error_info &foo();
void f() { foo(); } // Does not diagnose, error_info is a reference.
}];
}
@@ -1076,64 +1076,110 @@ the front end.
}];
}
-def DocCatAMDGPURegisterAttributes :
- DocumentationCategory<"AMD GPU Register Attributes"> {
- let Content = [{
-Clang supports attributes for controlling register usage on AMD GPU
-targets. These attributes may be attached to a kernel function
-definition and is an optimization hint to the backend for the maximum
-number of registers to use. This is useful in cases where register
-limited occupancy is known to be an important factor for the
-performance for the kernel.
-
-The semantics are as follows:
-
-- The backend will attempt to limit the number of used registers to
- the specified value, but the exact number used is not
- guaranteed. The number used may be rounded up to satisfy the
- allocation requirements or ABI constraints of the subtarget. For
- example, on Southern Islands VGPRs may only be allocated in
- increments of 4, so requesting a limit of 39 VGPRs will really
- attempt to use up to 40. Requesting more registers than the
- subtarget supports will truncate to the maximum allowed. The backend
- may also use fewer registers than requested whenever possible.
-
-- 0 implies the default no limit on register usage.
+def DocCatAMDGPUAttributes : DocumentationCategory<"AMD GPU Attributes">;
-- Ignored on older VLIW subtargets which did not have separate scalar
- and vector registers, R600 through Northern Islands.
+def AMDGPUFlatWorkGroupSizeDocs : Documentation {
+ let Category = DocCatAMDGPUAttributes;
+ let Content = [{
+The flat work-group size is the number of work-items in the work-group size
+specified when the kernel is dispatched. It is the product of the sizes of the
+x, y, and z dimension of the work-group.
-}];
+Clang supports the
+``__attribute__((amdgpu_flat_work_group_size(<min>, <max>)))`` attribute for the
+AMDGPU target. This attribute may be attached to a kernel function definition
+and is an optimization hint.
+
+``<min>`` parameter specifies the minimum flat work-group size, and ``<max>``
+parameter specifies the maximum flat work-group size (must be greater than
+``<min>``) to which all dispatches of the kernel will conform. Passing ``0, 0``
+as ``<min>, <max>`` implies the default behavior (``128, 256``).
+
+If specified, the AMDGPU target backend might be able to produce better machine
+code for barriers and perform scratch promotion by estimating available group
+segment size.
+
+An error will be given if:
+ - Specified values violate subtarget specifications;
+ - Specified values are not compatible with values provided through other
+ attributes.
+ }];
}
-
-def AMDGPUNumVGPRDocs : Documentation {
- let Category = DocCatAMDGPURegisterAttributes;
+def AMDGPUWavesPerEUDocs : Documentation {
+ let Category = DocCatAMDGPUAttributes;
let Content = [{
-Clang supports the
-``__attribute__((amdgpu_num_vgpr(<num_registers>)))`` attribute on AMD
-Southern Islands GPUs and later for controlling the number of vector
-registers. A typical value would be between 4 and 256 in increments
-of 4.
-}];
+A compute unit (CU) is responsible for executing the wavefronts of a work-group.
+It is composed of one or more execution units (EU), which are responsible for
+executing the wavefronts. An EU can have enough resources to maintain the state
+of more than one executing wavefront. This allows an EU to hide latency by
+switching between wavefronts in a similar way to symmetric multithreading on a
+CPU. In order to allow the state for multiple wavefronts to fit on an EU, the
+resources used by a single wavefront have to be limited. For example, the number
+of SGPRs and VGPRs. Limiting such resources can allow greater latency hiding,
+but can result in having to spill some register state to memory.
+
+Clang supports the ``__attribute__((amdgpu_waves_per_eu(<min>[, <max>])))``
+attribute for the AMDGPU target. This attribute may be attached to a kernel
+function definition and is an optimization hint.
+
+``<min>`` parameter specifies the requested minimum number of waves per EU, and
+*optional* ``<max>`` parameter specifies the requested maximum number of waves
+per EU (must be greater than ``<min>`` if specified). If ``<max>`` is omitted,
+then there is no restriction on the maximum number of waves per EU other than
+the one dictated by the hardware for which the kernel is compiled. Passing
+``0, 0`` as ``<min>, <max>`` implies the default behavior (no limits).
+
+If specified, this attribute allows an advanced developer to tune the number of
+wavefronts that are capable of fitting within the resources of an EU. The AMDGPU
+target backend can use this information to limit resources, such as number of
+SGPRs, number of VGPRs, size of available group and private memory segments, in
+such a way that guarantees that at least ``<min>`` wavefronts and at most
+``<max>`` wavefronts are able to fit within the resources of an EU. Requesting
+more wavefronts can hide memory latency but limits available registers which
+can result in spilling. Requesting fewer wavefronts can help reduce cache
+thrashing, but can reduce memory latency hiding.
+
+This attribute controls the machine code generated by the AMDGPU target backend
+to ensure it is capable of meeting the requested values. However, when the
+kernel is executed, there may be other reasons that prevent meeting the request,
+for example, there may be wavefronts from other kernels executing on the EU.
+
+An error will be given if:
+ - Specified values violate subtarget specifications;
+ - Specified values are not compatible with values provided through other
+ attributes;
+ - The AMDGPU target backend is unable to create machine code that can meet the
+ request.
+ }];
}
-def AMDGPUNumSGPRDocs : Documentation {
- let Category = DocCatAMDGPURegisterAttributes;
+def AMDGPUNumSGPRNumVGPRDocs : Documentation {
+ let Category = DocCatAMDGPUAttributes;
let Content = [{
-
-Clang supports the
-``__attribute__((amdgpu_num_sgpr(<num_registers>)))`` attribute on AMD
-Southern Islands GPUs and later for controlling the number of scalar
-registers. A typical value would be between 8 and 104 in increments of
-8.
-
-Due to common instruction constraints, an additional 2-4 SGPRs are
-typically required for internal use depending on features used. This
-value is a hint for the total number of SGPRs to use, and not the
-number of user SGPRs, so no special consideration needs to be given
-for these.
-}];
+Clang supports the ``__attribute__((amdgpu_num_sgpr(<num_sgpr>)))`` and
+``__attribute__((amdgpu_num_vgpr(<num_vgpr>)))`` attributes for the AMDGPU
+target. These attributes may be attached to a kernel function definition and are
+an optimization hint.
+
+If these attributes are specified, then the AMDGPU target backend will attempt
+to limit the number of SGPRs and/or VGPRs used to the specified value(s). The
+number of used SGPRs and/or VGPRs may further be rounded up to satisfy the
+allocation requirements or constraints of the subtarget. Passing ``0`` as
+``num_sgpr`` and/or ``num_vgpr`` implies the default behavior (no limits).
+
+These attributes can be used to test the AMDGPU target backend. It is
+recommended that the ``amdgpu_waves_per_eu`` attribute be used to control
+resources such as SGPRs and VGPRs since it is aware of the limits for different
+subtargets.
+
+An error will be given if:
+ - Specified values violate subtarget specifications;
+ - Specified values are not compatible with values provided through other
+ attributes;
+ - The AMDGPU target backend is unable to create machine code that can meet the
+ request.
+ }];
}
def DocCatCallingConvs : DocumentationCategory<"Calling Conventions"> {
Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=282371&r1=282370&r2=282371&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Sun Sep 25 20:02:57 2016
@@ -2382,6 +2382,9 @@ def err_swift_abi_parameter_wrong_type :
"'%0' parameter must have pointer%select{| to unqualified pointer}1 type; "
"type here is %2">;
+def err_attribute_argument_invalid : Error<
+ "%0 attribute argument is invalid: %select{max must be 0 since min is 0|"
+ "min must not be greater than max}1">;
def err_attribute_argument_is_zero : Error<
"%0 attribute must be greater than 0">;
def warn_attribute_argument_n_negative : Warning<
Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=282371&r1=282370&r2=282371&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
+++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Sun Sep 25 20:02:57 2016
@@ -6946,25 +6946,55 @@ public:
static void appendOpenCLVersionMD (CodeGen::CodeGenModule &CGM);
void AMDGPUTargetCodeGenInfo::setTargetAttributes(
- const Decl *D,
- llvm::GlobalValue *GV,
- CodeGen::CodeGenModule &M) const {
+ const Decl *D,
+ llvm::GlobalValue *GV,
+ CodeGen::CodeGenModule &M) const {
const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D);
if (!FD)
return;
- if (const auto Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
- llvm::Function *F = cast<llvm::Function>(GV);
- uint32_t NumVGPR = Attr->getNumVGPR();
- if (NumVGPR != 0)
- F->addFnAttr("amdgpu_num_vgpr", llvm::utostr(NumVGPR));
+ llvm::Function *F = cast<llvm::Function>(GV);
+
+ if (const auto *Attr = FD->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
+ unsigned Min = Attr->getMin();
+ unsigned Max = Attr->getMax();
+
+ if (Min != 0) {
+ assert(Min <= Max && "Min must be less than or equal Max");
+
+ std::string AttrVal = llvm::utostr(Min) + "," + llvm::utostr(Max);
+ F->addFnAttr("amdgpu-flat-work-group-size", AttrVal);
+ } else
+ assert(Max == 0 && "Max must be zero");
}
- if (const auto Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
- llvm::Function *F = cast<llvm::Function>(GV);
+ if (const auto *Attr = FD->getAttr<AMDGPUWavesPerEUAttr>()) {
+ unsigned Min = Attr->getMin();
+ unsigned Max = Attr->getMax();
+
+ if (Min != 0) {
+ assert((Max == 0 || Min <= Max) && "Min must be less than or equal Max");
+
+ std::string AttrVal = llvm::utostr(Min);
+ if (Max != 0)
+ AttrVal = AttrVal + "," + llvm::utostr(Max);
+ F->addFnAttr("amdgpu-waves-per-eu", AttrVal);
+ } else
+ assert(Max == 0 && "Max must be zero");
+ }
+
+ if (const auto *Attr = FD->getAttr<AMDGPUNumSGPRAttr>()) {
unsigned NumSGPR = Attr->getNumSGPR();
+
if (NumSGPR != 0)
- F->addFnAttr("amdgpu_num_sgpr", llvm::utostr(NumSGPR));
+ F->addFnAttr("amdgpu-num-sgpr", llvm::utostr(NumSGPR));
+ }
+
+ if (const auto *Attr = FD->getAttr<AMDGPUNumVGPRAttr>()) {
+ uint32_t NumVGPR = Attr->getNumVGPR();
+
+ if (NumVGPR != 0)
+ F->addFnAttr("amdgpu-num-vgpr", llvm::utostr(NumVGPR));
}
appendOpenCLVersionMD(M);
Modified: cfe/trunk/lib/Sema/SemaDeclAttr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaDeclAttr.cpp?rev=282371&r1=282370&r2=282371&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaDeclAttr.cpp (original)
+++ cfe/trunk/lib/Sema/SemaDeclAttr.cpp Sun Sep 25 20:02:57 2016
@@ -4941,29 +4941,85 @@ static void handleInterruptAttr(Sema &S,
}
}
-static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
+static void handleAMDGPUFlatWorkGroupSizeAttr(Sema &S, Decl *D,
+ const AttributeList &Attr) {
+ uint32_t Min = 0;
+ Expr *MinExpr = Attr.getArgAsExpr(0);
+ if (!checkUInt32Argument(S, Attr, MinExpr, Min))
+ return;
+
+ uint32_t Max = 0;
+ Expr *MaxExpr = Attr.getArgAsExpr(1);
+ if (!checkUInt32Argument(S, Attr, MaxExpr, Max))
+ return;
+
+ if (Min == 0 && Max != 0) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
+ << Attr.getName() << 0;
+ return;
+ }
+ if (Min > Max) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
+ << Attr.getName() << 1;
+ return;
+ }
+
+ D->addAttr(::new (S.Context)
+ AMDGPUFlatWorkGroupSizeAttr(Attr.getLoc(), S.Context, Min, Max,
+ Attr.getAttributeSpellingListIndex()));
+}
+
+static void handleAMDGPUWavesPerEUAttr(Sema &S, Decl *D,
+ const AttributeList &Attr) {
+ uint32_t Min = 0;
+ Expr *MinExpr = Attr.getArgAsExpr(0);
+ if (!checkUInt32Argument(S, Attr, MinExpr, Min))
+ return;
+
+ uint32_t Max = 0;
+ if (Attr.getNumArgs() == 2) {
+ Expr *MaxExpr = Attr.getArgAsExpr(1);
+ if (!checkUInt32Argument(S, Attr, MaxExpr, Max))
+ return;
+ }
+
+ if (Min == 0 && Max != 0) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
+ << Attr.getName() << 0;
+ return;
+ }
+ if (Max != 0 && Min > Max) {
+ S.Diag(Attr.getLoc(), diag::err_attribute_argument_invalid)
+ << Attr.getName() << 1;
+ return;
+ }
+
+ D->addAttr(::new (S.Context)
+ AMDGPUWavesPerEUAttr(Attr.getLoc(), S.Context, Min, Max,
+ Attr.getAttributeSpellingListIndex()));
+}
+
+static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D,
const AttributeList &Attr) {
- uint32_t NumRegs;
- Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
- if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
+ uint32_t NumSGPR = 0;
+ Expr *NumSGPRExpr = Attr.getArgAsExpr(0);
+ if (!checkUInt32Argument(S, Attr, NumSGPRExpr, NumSGPR))
return;
D->addAttr(::new (S.Context)
- AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context,
- NumRegs,
+ AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context, NumSGPR,
Attr.getAttributeSpellingListIndex()));
}
-static void handleAMDGPUNumSGPRAttr(Sema &S, Decl *D,
+static void handleAMDGPUNumVGPRAttr(Sema &S, Decl *D,
const AttributeList &Attr) {
- uint32_t NumRegs;
- Expr *NumRegsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
- if (!checkUInt32Argument(S, Attr, NumRegsExpr, NumRegs))
+ uint32_t NumVGPR = 0;
+ Expr *NumVGPRExpr = Attr.getArgAsExpr(0);
+ if (!checkUInt32Argument(S, Attr, NumVGPRExpr, NumVGPR))
return;
D->addAttr(::new (S.Context)
- AMDGPUNumSGPRAttr(Attr.getLoc(), S.Context,
- NumRegs,
+ AMDGPUNumVGPRAttr(Attr.getLoc(), S.Context, NumVGPR,
Attr.getAttributeSpellingListIndex()));
}
@@ -5417,12 +5473,18 @@ static void ProcessDeclAttribute(Sema &S
case AttributeList::AT_NoMips16:
handleSimpleAttribute<NoMips16Attr>(S, D, Attr);
break;
- case AttributeList::AT_AMDGPUNumVGPR:
- handleAMDGPUNumVGPRAttr(S, D, Attr);
+ case AttributeList::AT_AMDGPUFlatWorkGroupSize:
+ handleAMDGPUFlatWorkGroupSizeAttr(S, D, Attr);
+ break;
+ case AttributeList::AT_AMDGPUWavesPerEU:
+ handleAMDGPUWavesPerEUAttr(S, D, Attr);
break;
case AttributeList::AT_AMDGPUNumSGPR:
handleAMDGPUNumSGPRAttr(S, D, Attr);
break;
+ case AttributeList::AT_AMDGPUNumVGPR:
+ handleAMDGPUNumVGPRAttr(S, D, Attr);
+ break;
case AttributeList::AT_IBAction:
handleSimpleAttribute<IBActionAttr>(S, D, Attr);
break;
@@ -5974,7 +6036,11 @@ void Sema::ProcessDeclAttributeList(Scop
} else if (Attr *A = D->getAttr<VecTypeHintAttr>()) {
Diag(D->getLocation(), diag::err_opencl_kernel_attr) << A;
D->setInvalidDecl();
- } else if (Attr *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
+ } else if (Attr *A = D->getAttr<AMDGPUFlatWorkGroupSizeAttr>()) {
+ Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+ << A << ExpectedKernelFunction;
+ D->setInvalidDecl();
+ } else if (Attr *A = D->getAttr<AMDGPUWavesPerEUAttr>()) {
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
@@ -5982,6 +6048,10 @@ void Sema::ProcessDeclAttributeList(Scop
Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
<< A << ExpectedKernelFunction;
D->setInvalidDecl();
+ } else if (Attr *A = D->getAttr<AMDGPUNumVGPRAttr>()) {
+ Diag(D->getLocation(), diag::err_attribute_wrong_decl_type)
+ << A << ExpectedKernelFunction;
+ D->setInvalidDecl();
}
}
}
Added: cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl?rev=282371&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl (added)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-attrs.cl Sun Sep 25 20:02:57 2016
@@ -0,0 +1,166 @@
+// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
+
+__attribute__((amdgpu_flat_work_group_size(0, 0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0() {}
+__attribute__((amdgpu_waves_per_eu(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_0() {}
+__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void num_sgpr0() {}
+__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void num_vgpr0() {}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_num_sgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_num_vgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_num_sgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_num_vgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_0_num_sgpr_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void waves_per_eu_0_0_num_vgpr_0() {}
+__attribute__((amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void num_sgpr_0_num_vgpr_0() {}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_num_vgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_vgpr_0() {}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_num_sgpr_0_num_vgpr_0() {}
+__attribute__((amdgpu_flat_work_group_size(0, 0), amdgpu_waves_per_eu(0, 0), amdgpu_num_sgpr(0), amdgpu_num_vgpr(0))) // expected-no-diagnostics
+kernel void flat_work_group_size_0_0_waves_per_eu_0_0_num_sgpr_0_num_vgpr_0() {}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64() [[FLAT_WORK_GROUP_SIZE_32_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2))) // expected-no-diagnostics
+kernel void waves_per_eu_2() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2() [[WAVES_PER_EU_2:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
+kernel void waves_per_eu_2_4() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_4() [[WAVES_PER_EU_2_4:#[0-9]+]]
+}
+__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @num_sgpr_32() [[NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @num_vgpr_64() [[NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void waves_per_eu_2_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_sgpr_32() [[WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void waves_per_eu_2_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_num_vgpr_64() [[WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32))) // expected-no-diagnostics
+kernel void waves_per_eu_2_4_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_sgpr_32() [[WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void waves_per_eu_2_4_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @waves_per_eu_2_4_num_vgpr_64() [[WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void num_sgpr_32_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @num_sgpr_32_num_vgpr_64() [[NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64:#[0-9]+]]
+}
+
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
+}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64))) // expected-no-diagnostics
+kernel void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {
+// CHECK: define amdgpu_kernel void @flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64:#[0-9]+]]
+}
+
+// Make sure this is silently accepted on other targets.
+// X86-NOT: "amdgpu-flat-work-group-size"
+// X86-NOT: "amdgpu-waves-per-eu"
+// X86-NOT: "amdgpu-num-vgpr"
+// X86-NOT: "amdgpu-num-sgpr"
+
+// CHECK-NOT: "amdgpu-flat-work-group-size"="0,0"
+// CHECK-NOT: "amdgpu-waves-per-eu"="0"
+// CHECK-NOT: "amdgpu-waves-per-eu"="0,0"
+// CHECK-NOT: "amdgpu-num-sgpr"="0"
+// CHECK-NOT: "amdgpu-num-vgpr"="0"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2]] = { nounwind "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4]] = { nounwind "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32]] = { nounwind "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[NUM_VGPR_64]] = { nounwind "amdgpu-num-vgpr"="64"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_SGPR_32]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_SGPR_32]] = { nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_NUM_VGPR_64]] = { nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_SGPR_32]] = { nounwind "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[WAVES_PER_EU_2_4_NUM_VGPR_64]] = { nounwind "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[NUM_SGPR_32_NUM_VGPR_64]] = { nounwind "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-waves-per-eu"="2,4"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
+
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_NUM_SGPR_32_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2"
+// CHECK-DAG: attributes [[FLAT_WORK_GROUP_SIZE_32_64_WAVES_PER_EU_2_4_NUM_SGPR_32_NUM_VGPR_64]] = { nounwind "amdgpu-flat-work-group-size"="32,64" "amdgpu-num-sgpr"="32" "amdgpu-num-vgpr"="64" "amdgpu-waves-per-eu"="2,4"
Removed: cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl?rev=282370&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl (original)
+++ cfe/trunk/test/CodeGenOpenCL/amdgpu-num-gpr-attr.cl (removed)
@@ -1,48 +0,0 @@
-// RUN: %clang_cc1 -triple amdgcn-- -target-cpu tahiti -O0 -emit-llvm -o - %s | FileCheck %s
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -O0 -emit-llvm -verify -o - %s | FileCheck -check-prefix=X86 %s
-
-// Make sure this is silently accepted on other targets.
-
-__attribute__((amdgpu_num_vgpr(64))) // expected-no-diagnostics
-kernel void test_num_vgpr64() {
-// CHECK: define amdgpu_kernel void @test_num_vgpr64() [[ATTR_VGPR64:#[0-9]+]]
-}
-
-__attribute__((amdgpu_num_sgpr(32))) // expected-no-diagnostics
-kernel void test_num_sgpr32() {
-// CHECK: define amdgpu_kernel void @test_num_sgpr32() [[ATTR_SGPR32:#[0-9]+]]
-}
-
-__attribute__((amdgpu_num_vgpr(64), amdgpu_num_sgpr(32))) // expected-no-diagnostics
-kernel void test_num_vgpr64_sgpr32() {
-// CHECK: define amdgpu_kernel void @test_num_vgpr64_sgpr32() [[ATTR_VGPR64_SGPR32:#[0-9]+]]
-
-}
-
-__attribute__((amdgpu_num_sgpr(20), amdgpu_num_vgpr(40))) // expected-no-diagnostics
-kernel void test_num_sgpr20_vgpr40() {
-// CHECK: define amdgpu_kernel void @test_num_sgpr20_vgpr40() [[ATTR_SGPR20_VGPR40:#[0-9]+]]
-}
-
-__attribute__((amdgpu_num_vgpr(0))) // expected-no-diagnostics
-kernel void test_num_vgpr0() {
-}
-
-__attribute__((amdgpu_num_sgpr(0))) // expected-no-diagnostics
-kernel void test_num_sgpr0() {
-}
-
-__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) // expected-no-diagnostics
-kernel void test_num_vgpr0_sgpr0() {
-}
-
-
-// X86-NOT: "amdgpu_num_vgpr"
-// X86-NOT: "amdgpu_num_sgpr"
-
-// CHECK-NOT: "amdgpu_num_vgpr"="0"
-// CHECK-NOT: "amdgpu_num_sgpr"="0"
-// CHECK-DAG: attributes [[ATTR_VGPR64]] = { nounwind "amdgpu_num_vgpr"="64"
-// CHECK-DAG: attributes [[ATTR_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32"
-// CHECK-DAG: attributes [[ATTR_VGPR64_SGPR32]] = { nounwind "amdgpu_num_sgpr"="32" "amdgpu_num_vgpr"="64"
-// CHECK-DAG: attributes [[ATTR_SGPR20_VGPR40]] = { nounwind "amdgpu_num_sgpr"="20" "amdgpu_num_vgpr"="40"
Added: cfe/trunk/test/SemaCUDA/amdgpu-attrs.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/amdgpu-attrs.cu?rev=282371&view=auto
==============================================================================
--- cfe/trunk/test/SemaCUDA/amdgpu-attrs.cu (added)
+++ cfe/trunk/test/SemaCUDA/amdgpu-attrs.cu Sun Sep 25 20:02:57 2016
@@ -0,0 +1,110 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+#include "Inputs/cuda.h"
+
+
+// expected-error at +2 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64)))
+__global__ void flat_work_group_size_32_64() {}
+
+// expected-error at +2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2)))
+__global__ void waves_per_eu_2() {}
+
+// expected-error at +2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2, 4)))
+__global__ void waves_per_eu_2_4() {}
+
+// expected-error at +2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_sgpr(32)))
+__global__ void num_sgpr_32() {}
+
+// expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_vgpr(64)))
+__global__ void num_vgpr_64() {}
+
+
+// expected-error at +3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2() {}
+
+// expected-error at +3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_4() {}
+
+// expected-error at +3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_sgpr(32)))
+__global__ void flat_work_group_size_32_64_num_sgpr_32() {}
+
+// expected-error at +3 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_num_vgpr_64() {}
+
+// expected-error at +3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
+__global__ void waves_per_eu_2_num_sgpr_32() {}
+
+// expected-error at +3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
+__global__ void waves_per_eu_2_num_vgpr_64() {}
+
+// expected-error at +3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
+__global__ void waves_per_eu_2_4_num_sgpr_32() {}
+
+// expected-error at +3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
+__global__ void waves_per_eu_2_4_num_vgpr_64() {}
+
+// expected-error at +3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
+__global__ void num_sgpr_32_num_vgpr_64() {}
+
+
+// expected-error at +4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32() {}
+
+// expected-error at +4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_vgpr_64() {}
+
+// expected-error at +4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32() {}
+
+// expected-error at +4 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +3 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_vgpr_64() {}
+
+
+// expected-error at +5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_num_sgpr_32_num_vgpr_64() {}
+
+// expected-error at +5 {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+// fixme-expected-error at +4 {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+// fixme-expected-error at +3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+// fixme-expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_flat_work_group_size(32, 64), amdgpu_waves_per_eu(2, 4), amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
+__global__ void flat_work_group_size_32_64_waves_per_eu_2_4_num_sgpr_32_num_vgpr_64() {}
Removed: cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu?rev=282370&view=auto
==============================================================================
--- cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu (original)
+++ cfe/trunk/test/SemaCUDA/amdgpu-num-gpr-attr.cu (removed)
@@ -1,14 +0,0 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
-
-#include "Inputs/cuda.h"
-
-__attribute__((amdgpu_num_vgpr(64)))
-__global__ void test_num_vgpr() { } // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
-
-__attribute__((amdgpu_num_sgpr(32)))
-__global__ void test_num_sgpr() { } // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-
-// fixme-expected-error at +3 {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-// expected-error at +2 {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
-__attribute__((amdgpu_num_sgpr(32), amdgpu_num_vgpr(64)))
-__global__ void test_num_vgpr_num_sgpr() { }
Added: cfe/trunk/test/SemaOpenCL/amdgpu-attrs.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/amdgpu-attrs.cl?rev=282371&view=auto
==============================================================================
--- cfe/trunk/test/SemaOpenCL/amdgpu-attrs.cl (added)
+++ cfe/trunk/test/SemaOpenCL/amdgpu-attrs.cl Sun Sep 25 20:02:57 2016
@@ -0,0 +1,66 @@
+// RUN: %clang_cc1 -triple amdgcn-- -verify -fsyntax-only %s
+
+typedef __attribute__((amdgpu_flat_work_group_size(32, 64))) struct struct_flat_work_group_size_32_64 { // expected-error {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+ int x;
+ float y;
+} struct_flat_work_group_size_32_64;
+typedef __attribute__((amdgpu_waves_per_eu(2))) struct struct_waves_per_eu_2 { // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+ int x;
+ float y;
+} struct_waves_per_eu_2;
+typedef __attribute__((amdgpu_waves_per_eu(2, 4))) struct struct_waves_per_eu_2_4 { // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+ int x;
+ float y;
+} struct_waves_per_eu_2_4;
+typedef __attribute__((amdgpu_num_sgpr(32))) struct struct_num_sgpr_32 { // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+ int x;
+ float y;
+} struct_num_sgpr_32;
+typedef __attribute__((amdgpu_num_vgpr(64))) struct struct_num_vgpr_64 { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+ int x;
+ float y;
+} struct_num_vgpr_64;
+
+__attribute__((amdgpu_flat_work_group_size(32, 64))) void func_flat_work_group_size_32_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2))) void func_waves_per_eu_2() {} // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_waves_per_eu(2, 4))) void func_waves_per_eu_2_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_sgpr(32))) void func_num_sgpr_32() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
+__attribute__((amdgpu_num_vgpr(64))) void func_num_vgpr_64() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
+
+__attribute__((amdgpu_flat_work_group_size("ABC", "ABC"))) kernel void kernel_flat_work_group_size_ABC_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
+__attribute__((amdgpu_flat_work_group_size(32, "ABC"))) kernel void kernel_flat_work_group_size_32_ABC() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
+__attribute__((amdgpu_flat_work_group_size("ABC", 64))) kernel void kernel_flat_work_group_size_ABC_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute requires an integer constant}}
+__attribute__((amdgpu_waves_per_eu("ABC"))) kernel void kernel_waves_per_eu_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
+__attribute__((amdgpu_waves_per_eu(2, "ABC"))) kernel void kernel_waves_per_eu_2_ABC() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
+__attribute__((amdgpu_waves_per_eu("ABC", 4))) kernel void kernel_waves_per_eu_ABC_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute requires an integer constant}}
+__attribute__((amdgpu_num_sgpr("ABC"))) kernel void kernel_num_sgpr_ABC() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}}
+__attribute__((amdgpu_num_vgpr("ABC"))) kernel void kernel_num_vgpr_ABC() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}}
+
+__attribute__((amdgpu_flat_work_group_size(4294967296, 4294967296))) kernel void kernel_flat_work_group_size_L_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_flat_work_group_size(32, 4294967296))) kernel void kernel_flat_work_group_size_32_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_flat_work_group_size(4294967296, 64))) kernel void kernel_flat_work_group_size_L_64() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_waves_per_eu(4294967296))) kernel void kernel_waves_per_eu_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_waves_per_eu(2, 4294967296))) kernel void kernel_waves_per_eu_2_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_waves_per_eu(4294967296, 4))) kernel void kernel_waves_per_eu_L_4() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_num_sgpr(4294967296))) kernel void kernel_num_sgpr_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+__attribute__((amdgpu_num_vgpr(4294967296))) kernel void kernel_num_vgpr_L() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
+
+__attribute__((amdgpu_flat_work_group_size(0, 64))) kernel void kernel_flat_work_group_size_0_64() {} // expected-error {{'amdgpu_flat_work_group_size' attribute argument is invalid: max must be 0 since min is 0}}
+__attribute__((amdgpu_waves_per_eu(0, 4))) kernel void kernel_waves_per_eu_0_4() {} // expected-error {{'amdgpu_waves_per_eu' attribute argument is invalid: max must be 0 since min is 0}}
+
+__attribute__((amdgpu_flat_work_group_size(64, 32))) kernel void kernel_flat_work_group_size_64_32() {} // expected-error {{'amdgpu_flat_work_group_size' attribute argument is invalid: min must not be greater than max}}
+__attribute__((amdgpu_waves_per_eu(4, 2))) kernel void kernel_waves_per_eu_4_2() {} // expected-error {{'amdgpu_waves_per_eu' attribute argument is invalid: min must not be greater than max}}
+
+__attribute__((amdgpu_waves_per_eu(2, 4, 8))) kernel void kernel_waves_per_eu_2_4_8() {} // expected-error {{'amdgpu_waves_per_eu' attribute takes no more than 2 arguments}}
+
+__attribute__((amdgpu_flat_work_group_size(0, 0))) kernel void kernel_flat_work_group_size_0_0() {}
+__attribute__((amdgpu_waves_per_eu(0))) kernel void kernel_waves_per_eu_0() {}
+__attribute__((amdgpu_waves_per_eu(0, 0))) kernel void kernel_waves_per_eu_0_0() {}
+__attribute__((amdgpu_num_sgpr(0))) kernel void kernel_num_sgpr_0() {}
+__attribute__((amdgpu_num_vgpr(0))) kernel void kernel_num_vgpr_0() {}
+
+kernel __attribute__((amdgpu_flat_work_group_size(32, 64))) void kernel_flat_work_group_size_32_64() {}
+kernel __attribute__((amdgpu_waves_per_eu(2))) void kernel_waves_per_eu_2() {}
+kernel __attribute__((amdgpu_waves_per_eu(2, 4))) void kernel_waves_per_eu_2_4() {}
+kernel __attribute__((amdgpu_num_sgpr(32))) void kernel_num_sgpr_32() {}
+kernel __attribute__((amdgpu_num_vgpr(64))) void kernel_num_vgpr_64() {}
Removed: cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl?rev=282370&view=auto
==============================================================================
--- cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl (original)
+++ cfe/trunk/test/SemaOpenCL/amdgpu-num-register-attrs.cl (removed)
@@ -1,40 +0,0 @@
-// RUN: %clang_cc1 -triple r600-- -verify -fsyntax-only %s
-
-typedef __attribute__((amdgpu_num_vgpr(128))) struct FooStruct { // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
- int x;
- float y;
-} FooStruct;
-
-
-__attribute__((amdgpu_num_vgpr("ABC"))) kernel void foo2() {} // expected-error {{'amdgpu_num_vgpr' attribute requires an integer constant}}
-__attribute__((amdgpu_num_sgpr("ABC"))) kernel void foo3() {} // expected-error {{'amdgpu_num_sgpr' attribute requires an integer constant}}
-
-
-__attribute__((amdgpu_num_vgpr(40))) void foo4() {} // expected-error {{'amdgpu_num_vgpr' attribute only applies to kernel functions}}
-__attribute__((amdgpu_num_sgpr(64))) void foo5() {} // expected-error {{'amdgpu_num_sgpr' attribute only applies to kernel functions}}
-
-__attribute__((amdgpu_num_vgpr(40))) kernel void foo7() {}
-__attribute__((amdgpu_num_sgpr(64))) kernel void foo8() {}
-__attribute__((amdgpu_num_vgpr(40), amdgpu_num_sgpr(64))) kernel void foo9() {}
-
-// Check 0 VGPR is accepted.
-__attribute__((amdgpu_num_vgpr(0))) kernel void foo10() {}
-
-// Check 0 SGPR is accepted.
-__attribute__((amdgpu_num_sgpr(0))) kernel void foo11() {}
-
-// Check both 0 SGPR and VGPR is accepted.
-__attribute__((amdgpu_num_vgpr(0), amdgpu_num_sgpr(0))) kernel void foo12() {}
-
-// Too large VGPR value.
-__attribute__((amdgpu_num_vgpr(4294967296))) kernel void foo13() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
-
-__attribute__((amdgpu_num_sgpr(4294967296))) kernel void foo14() {} // expected-error {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
-
-__attribute__((amdgpu_num_sgpr(4294967296), amdgpu_num_vgpr(4294967296))) kernel void foo15() {} // expected-error 2 {{integer constant expression evaluates to value 4294967296 that cannot be represented in a 32-bit unsigned integer type}}
-
-
-// Make sure it is accepted with kernel keyword before the attribute.
-kernel __attribute__((amdgpu_num_vgpr(40))) void foo16() {}
-
-kernel __attribute__((amdgpu_num_sgpr(40))) void foo17() {}
More information about the cfe-commits
mailing list