[PATCH] [CUDA] Allow using integral non-type template parameters as launch_bounds attribute arguments.
Aaron Ballman
aaron at aaronballman.com
Mon Apr 13 09:47:40 PDT 2015
On Fri, Apr 10, 2015 at 6:22 PM, Artem Belevich <tra at google.com> wrote:
> Hi rsmith, eliben,
>
> Allow using integral non-type template parameters as launch_bounds attribute arguments.
>
> - Changed CUDALaunchBounds arguments from integers to Expr* so they can
> be saved in AST for instantiation, if needed.
> - Added support for template instantiation of launch_bounds attrubute.
> - Moved evaluation of launch_bounds arguments to NVPTXTargetCodeGenInfo::
> SetTargetAttributes() where it can be done after template instantiation.
> - Amended test cases.
>
> http://reviews.llvm.org/D8985
>
> Files:
> include/clang/Basic/Attr.td
> lib/CodeGen/TargetInfo.cpp
> lib/Sema/SemaDeclAttr.cpp
> lib/Sema/SemaTemplateInstantiateDecl.cpp
> test/CodeGenCUDA/launch-bounds.cu
> test/SemaCUDA/launch_bounds.cu
>
> EMAIL PREFERENCES
> http://reviews.llvm.org/settings/panel/emailpreferences/
> Index: include/clang/Basic/Attr.td
> ===================================================================
> --- include/clang/Basic/Attr.td
> +++ include/clang/Basic/Attr.td
> @@ -581,7 +581,7 @@
>
> def CUDALaunchBounds : InheritableAttr {
> let Spellings = [GNU<"launch_bounds">];
> - let Args = [IntArgument<"MaxThreads">, DefaultIntArgument<"MinBlocks", 0>];
> + let Args = [ExprArgument<"MaxThreads">, ExprArgument<"MinBlocks", 1>];
On the wish list for someday: a ConstIntExprArgument that does
boilerplate for you. I don't expect that for your patch, but boy, it
sure would be nice. ;-)
> let LangOpts = [CUDA];
> let Subjects = SubjectList<[ObjCMethod, FunctionLike], WarnDiag,
> "ExpectedFunctionOrMethod">;
> Index: lib/CodeGen/TargetInfo.cpp
> ===================================================================
> --- lib/CodeGen/TargetInfo.cpp
> +++ lib/CodeGen/TargetInfo.cpp
> @@ -5079,16 +5079,24 @@
> }
> if (FD->hasAttr<CUDALaunchBoundsAttr>()) {
> // Create !{<func-ref>, metadata !"maxntidx", i32 <val>} node
> - addNVVMMetadata(F, "maxntidx",
> - FD->getAttr<CUDALaunchBoundsAttr>()->getMaxThreads());
> - // min blocks is a default argument for CUDALaunchBoundsAttr, so getting a
> - // zero value from getMinBlocks either means it was not specified in
> - // __launch_bounds__ or the user specified a 0 value. In both cases, we
> + CUDALaunchBoundsAttr *Attr = FD->getAttr<CUDALaunchBoundsAttr>();
Push this up into the if block instead of checking hasAttr and then
calling getAttr.
> + llvm::APSInt MaxThreads;
> + if (Attr->getMaxThreads()->EvaluateAsInt(MaxThreads, M.getContext()))
> + addNVVMMetadata(F, "maxntidx", MaxThreads.getExtValue());
Are a negative number of max threads allowed? I noticed that -1 seems
to get passed through unhindered.
> + else
> + llvm_unreachable("launch_bounds arg 1 evaluation failed.");
> +
> + // min blocks is a default argument for CUDALaunchBoundsAttr. If it was
> + // not specified in __launch_bounds__ or the user specified a 0 value, we
> // don't have to add a PTX directive.
> - int MinCTASM = FD->getAttr<CUDALaunchBoundsAttr>()->getMinBlocks();
> - if (MinCTASM > 0) {
> - // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
> - addNVVMMetadata(F, "minctasm", MinCTASM);
> + if (Attr->getMinBlocks()) {
> + llvm::APSInt MinBlocks;
> + if (Attr->getMinBlocks()->EvaluateAsInt(MinBlocks, M.getContext())) {
> + if (MinBlocks > 0)
> + // Create !{<func-ref>, metadata !"minctasm", i32 <val>} node
> + addNVVMMetadata(F, "minctasm", MinBlocks.getExtValue());
Silently disallowing negative or zero values is unkind; that should
really be a warning with a test case.
> + } else
> + llvm_unreachable("launch_bounds arg 2 evaluation failed.");
> }
> }
> }
> Index: lib/Sema/SemaDeclAttr.cpp
> ===================================================================
> --- lib/Sema/SemaDeclAttr.cpp
> +++ lib/Sema/SemaDeclAttr.cpp
> @@ -3457,20 +3457,38 @@
> return false;
> }
>
> +static bool isAcceptableLaunchBoundsArgument(Sema &S, Expr *E) {
> + return E->getType()->isIntegerType() &&
> + !E->containsUnexpandedParameterPack() &&
> + (E->isInstantiationDependent() || E->isEvaluatable(S.Context));
> +}
> +
> static void handleLaunchBoundsAttr(Sema &S, Decl *D,
> const AttributeList &Attr) {
> - uint32_t MaxThreads, MinBlocks = 0;
> - if (!checkUInt32Argument(S, Attr, Attr.getArgAsExpr(0), MaxThreads, 1))
> + if (!checkAttributeAtLeastNumArgs(S, Attr, 1) ||
> + !checkAttributeAtMostNumArgs(S, Attr, 2))
> return;
> - if (Attr.getNumArgs() > 1 && !checkUInt32Argument(S, Attr,
> - Attr.getArgAsExpr(1),
> - MinBlocks, 2))
One thing the old code did that the new code does not is test the
validity of the value (for instance, that it's 32-bit instead of
64-bit) and diagnose.
> +
> + Expr *MaxThreads = Attr.getArgAsExpr(0);
> + if (!isAcceptableLaunchBoundsArgument(S, MaxThreads)) {
> + S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
> + << Attr.getName() << 0 << AANT_ArgumentIntegerConstant
> + << MaxThreads->getSourceRange();
> + return;
> + }
> +
> + Expr *MinBlocks = Attr.getNumArgs() > 1 ? Attr.getArgAsExpr(1) : nullptr;
> + if (MinBlocks && !isAcceptableLaunchBoundsArgument(S, MinBlocks)) {
> + S.Diag(Attr.getLoc(), diag::err_attribute_argument_n_type)
> + << Attr.getName() << 1 << AANT_ArgumentIntegerConstant
> + << MinBlocks->getSourceRange();
> return;
> + }
>
> - D->addAttr(::new (S.Context)
> - CUDALaunchBoundsAttr(Attr.getRange(), S.Context,
> - MaxThreads, MinBlocks,
> - Attr.getAttributeSpellingListIndex()));
> + D->addAttr(::new (S.Context) CUDALaunchBoundsAttr(
> + Attr.getRange(), S.Context, MaxThreads, MinBlocks,
> + Attr.getAttributeSpellingListIndex()));
> + return;
> }
>
> static void handleArgumentWithTypeTagAttr(Sema &S, Decl *D,
> Index: lib/Sema/SemaTemplateInstantiateDecl.cpp
> ===================================================================
> --- lib/Sema/SemaTemplateInstantiateDecl.cpp
> +++ lib/Sema/SemaTemplateInstantiateDecl.cpp
> @@ -202,6 +202,37 @@
> New->addAttr(EIA);
> }
>
> +static void instantiateDependentCUDALaunchBoundsAttr(
> + Sema &S, const MultiLevelTemplateArgumentList &TemplateArgs,
> + const CUDALaunchBoundsAttr *A, Decl *New) {
> +
> + // LaunchBounds arguments are constant expressions
> + EnterExpressionEvaluationContext Unevaluated(S, Sema::ConstantEvaluated);
> + ExprResult MaxThreadsResult = S.SubstExpr(A->getMaxThreads(), TemplateArgs);
> + if (MaxThreadsResult.isInvalid()) {
> + S.Diag(A->getLocation(), diag::err_attribute_argument_n_type)
> + << A->getSpelling() << 0 << AANT_ArgumentIntegerConstant
> + << A->getMaxThreads()->getSourceRange();
You should be able to just pass A instead of callign getSpelling on it
(this will also properly quote things).
> + return;
> + }
> +
> + Expr *MinBlocksExpr = A->getMinBlocks();
> + if (MinBlocksExpr) {
> + ExprResult MinBlocksResult = S.SubstExpr(A->getMinBlocks(), TemplateArgs);
> + if (MinBlocksResult.isInvalid()) {
> + S.Diag(A->getLocation(), diag::err_attribute_argument_n_type)
> + << A->getSpelling() << 1 << AANT_ArgumentIntegerConstant
> + << A->getMinBlocks()->getSourceRange();
Same here as above.
> + return;
> + }
> + MinBlocksExpr = MinBlocksResult.getAs<Expr>();
> + }
> +
> + New->addAttr(::new (S.Context) CUDALaunchBoundsAttr(
> + A->getRange(), S.Context, MaxThreadsResult.getAs<Expr>(), MinBlocksExpr,
> + A->getSpellingListIndex()));
> +}
> +
> void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs,
> const Decl *Tmpl, Decl *New,
> LateInstantiatedAttrVec *LateAttrs,
> @@ -233,6 +264,13 @@
> continue;
> }
>
> + const CUDALaunchBoundsAttr *CUDALaunchBounds =
> + dyn_cast<CUDALaunchBoundsAttr>(TmplAttr);
> + if (CUDALaunchBounds) {
Fold the dyn_cast into the if statement, use auto.
> + instantiateDependentCUDALaunchBoundsAttr(*this, TemplateArgs,
> + CUDALaunchBounds, New);
> + continue;
> + }
> // Existing DLL attribute on the instantiation takes precedence.
> if (TmplAttr->getKind() == attr::DLLExport ||
> TmplAttr->getKind() == attr::DLLImport) {
> Index: test/CodeGenCUDA/launch-bounds.cu
> ===================================================================
> --- test/CodeGenCUDA/launch-bounds.cu
> +++ test/CodeGenCUDA/launch-bounds.cu
> @@ -28,3 +28,23 @@
> }
>
> // CHECK: !{{[0-9]+}} = !{void ()* @Kernel2, !"maxntidx", i32 256}
> +
> +template <int max_threads_per_block>
> +__global__ void
> +__launch_bounds__(max_threads_per_block)
> +Kernel3()
> +{
> +}
> +
> +template void Kernel3<MAX_THREADS_PER_BLOCK>();
> +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel3{{.*}}, !"maxntidx", i32 256}
> +
> +template <int max_threads_per_block, int min_blocks_per_mp>
> +__global__ void
> +__launch_bounds__(max_threads_per_block, min_blocks_per_mp)
> +Kernel4()
> +{
> +}
> +template void Kernel4<MAX_THREADS_PER_BLOCK, MIN_BLOCKS_PER_MP>();
> +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"maxntidx", i32 256}
> +// CHECK: !{{[0-9]+}} = !{void ()* @{{.*}}Kernel4{{.*}}, !"minctasm", i32 2}
> Index: test/SemaCUDA/launch_bounds.cu
> ===================================================================
> --- test/SemaCUDA/launch_bounds.cu
> +++ test/SemaCUDA/launch_bounds.cu
> @@ -9,3 +9,9 @@
> __launch_bounds__() void Test4(void); // expected-error {{'launch_bounds' attribute takes at least 1 argument}}
>
> int Test5 __launch_bounds__(128, 7); // expected-warning {{'launch_bounds' attribute only applies to functions and methods}}
> +
> +template <int a, int b> __launch_bounds__(a, b) void Test6(void) {}
> +template void Test6<128,7>(void);
> +
> +template <int a> __launch_bounds__(a) void Test7(void) {}
> +template void Test7<128>(void);
>
You're missing tests that use actual expressions. It would also be
good to capture some tests with expressions you would expect to fail.
~Aaron
More information about the cfe-commits
mailing list