[clang] Reland "[CUDA][HIP] Fix overloading resolution in global var init" (PR #65606)
Yaxun Liu via cfe-commits
cfe-commits at lists.llvm.org
Thu Sep 7 18:37:36 PDT 2023
https://github.com/yxsamliu updated https://github.com/llvm/llvm-project/pull/65606:
>From a998f1291e63eff53bed12bb0a369f3b636c4ae0 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu at amd.com>
Date: Thu, 31 Aug 2023 18:02:56 -0400
Subject: [PATCH] Reland "[CUDA][HIP] Fix overloading resolution in global var
init"
https://reviews.llvm.org/D158247 caused regressions for HIP on Windows
and was reverted.
A reduced test case is:
```
typedef void (__stdcall* funcTy)();
void invoke(funcTy f);
static void __stdcall callee() noexcept {
}
void foo() {
invoke(callee);
}
```
It is due to clang missing handling host/device attributes for calling convention
at a few places
This patch fixes that.
---
.gitignore | 1 +
clang/include/clang/Sema/Sema.h | 46 +++++++++---
clang/lib/Parse/ParseDecl.cpp | 1 +
clang/lib/Sema/SemaCUDA.cpp | 24 ++++++-
clang/lib/Sema/SemaDeclAttr.cpp | 9 ++-
clang/lib/Sema/SemaOverload.cpp | 45 ++++++------
clang/lib/Sema/SemaType.cpp | 60 +++++++++-------
clang/test/CodeGenCUDA/global-initializers.cu | 51 +++++++++++++
.../SemaCUDA/amdgpu-windows-vectorcall.cu | 1 +
clang/test/SemaCUDA/function-overload.cu | 6 ++
.../test/SemaCUDA/global-initializers-host.cu | 32 ---------
clang/test/SemaCUDA/global-initializers.cu | 72 +++++++++++++++++++
clang/test/SemaCUDA/windows-calling-conv.cu | 17 +++++
13 files changed, 272 insertions(+), 93 deletions(-)
create mode 100644 clang/test/CodeGenCUDA/global-initializers.cu
delete mode 100644 clang/test/SemaCUDA/global-initializers-host.cu
create mode 100644 clang/test/SemaCUDA/global-initializers.cu
create mode 100644 clang/test/SemaCUDA/windows-calling-conv.cu
diff --git a/.gitignore b/.gitignore
index 20c4f52cd37860e..8021a3eb8919765 100644
--- a/.gitignore
+++ b/.gitignore
@@ -70,3 +70,4 @@ pythonenv*
/clang/utils/analyzer/projects/*/RefScanBuildResults
# automodapi puts generated documentation files here.
/lldb/docs/python_api/
+/Debug/
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 5fdca93c66ab5cd..1bb096c667e39c3 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -1012,6 +1012,14 @@ class Sema final {
}
} DelayedDiagnostics;
+ enum CUDAFunctionTarget {
+ CFT_Device,
+ CFT_Global,
+ CFT_Host,
+ CFT_HostDevice,
+ CFT_InvalidTarget
+ };
+
/// A RAII object to temporarily push a declaration context.
class ContextRAII {
private:
@@ -4753,8 +4761,13 @@ class Sema final {
bool isValidPointerAttrType(QualType T, bool RefOkay = false);
bool CheckRegparmAttr(const ParsedAttr &attr, unsigned &value);
+
+ /// Check validaty of calling convention attribute \p attr. If \p FD
+ /// is not null pointer, use \p FD to determine the CUDA/HIP host/device
+ /// target. Otherwise, it is specified by \p CFT.
bool CheckCallingConvAttr(const ParsedAttr &attr, CallingConv &CC,
- const FunctionDecl *FD = nullptr);
+ const FunctionDecl *FD = nullptr,
+ CUDAFunctionTarget CFT = CFT_InvalidTarget);
bool CheckAttrTarget(const ParsedAttr &CurrAttr);
bool CheckAttrNoArgs(const ParsedAttr &CurrAttr);
bool checkStringLiteralArgumentAttr(const AttributeCommonInfo &CI,
@@ -13266,14 +13279,6 @@ class Sema final {
void checkTypeSupport(QualType Ty, SourceLocation Loc,
ValueDecl *D = nullptr);
- enum CUDAFunctionTarget {
- CFT_Device,
- CFT_Global,
- CFT_Host,
- CFT_HostDevice,
- CFT_InvalidTarget
- };
-
/// Determines whether the given function is a CUDA device/host/kernel/etc.
/// function.
///
@@ -13292,6 +13297,29 @@ class Sema final {
/// Determines whether the given variable is emitted on host or device side.
CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D);
+ /// Defines kinds of CUDA global host/device context where a function may be
+ /// called.
+ enum CUDATargetContextKind {
+ CTCK_Unknown, /// Unknown context
+ CTCK_InitGlobalVar, /// Function called during global variable
+ /// initialization
+ };
+
+ /// Define the current global CUDA host/device context where a function may be
+ /// called. Only used when a function is called outside of any functions.
+ struct CUDATargetContext {
+ CUDAFunctionTarget Target = CFT_HostDevice;
+ CUDATargetContextKind Kind = CTCK_Unknown;
+ Decl *D = nullptr;
+ } CurCUDATargetCtx;
+
+ struct CUDATargetContextRAII {
+ Sema &S;
+ CUDATargetContext SavedCtx;
+ CUDATargetContextRAII(Sema &S_, CUDATargetContextKind K, Decl *D);
+ ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; }
+ };
+
/// Gets the CUDA target for the current context.
CUDAFunctionTarget CurrentCUDATarget() {
return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext));
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 4a9f2caf654713e..7c27a02ee4af625 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -2571,6 +2571,7 @@ Decl *Parser::ParseDeclarationAfterDeclaratorAndAttributes(
}
}
+ Sema::CUDATargetContextRAII X(Actions, Sema::CTCK_InitGlobalVar, ThisDecl);
switch (TheInitKind) {
// Parse declarator '=' initializer.
case InitKind::Equal: {
diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index cfea6493ced7d26..88f5484575db17a 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -105,19 +105,37 @@ Sema::IdentifyCUDATarget(const ParsedAttributesView &Attrs) {
}
template <typename A>
-static bool hasAttr(const FunctionDecl *D, bool IgnoreImplicitAttr) {
+static bool hasAttr(const Decl *D, bool IgnoreImplicitAttr) {
return D->hasAttrs() && llvm::any_of(D->getAttrs(), [&](Attr *Attribute) {
return isa<A>(Attribute) &&
!(IgnoreImplicitAttr && Attribute->isImplicit());
});
}
+Sema::CUDATargetContextRAII::CUDATargetContextRAII(Sema &S_,
+ CUDATargetContextKind K,
+ Decl *D)
+ : S(S_) {
+ SavedCtx = S.CurCUDATargetCtx;
+ assert(K == CTCK_InitGlobalVar);
+ auto *VD = dyn_cast_or_null<VarDecl>(D);
+ if (VD && VD->hasGlobalStorage() && !VD->isStaticLocal()) {
+ auto Target = CFT_Host;
+ if ((hasAttr<CUDADeviceAttr>(VD, /*IgnoreImplicit=*/true) &&
+ !hasAttr<CUDAHostAttr>(VD, /*IgnoreImplicit=*/true)) ||
+ hasAttr<CUDASharedAttr>(VD, /*IgnoreImplicit=*/true) ||
+ hasAttr<CUDAConstantAttr>(VD, /*IgnoreImplicit=*/true))
+ Target = CFT_Device;
+ S.CurCUDATargetCtx = {Target, K, VD};
+ }
+}
+
/// IdentifyCUDATarget - Determine the CUDA compilation target for this function
Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D,
bool IgnoreImplicitHDAttr) {
- // Code that lives outside a function is run on the host.
+ // Code that lives outside a function gets the target from CurCUDATargetCtx.
if (D == nullptr)
- return CFT_Host;
+ return CurCUDATargetCtx.Target;
if (D->hasAttr<CUDAInvalidTargetAttr>())
return CFT_InvalidTarget;
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 59e0f3e83cfdd80..cc98713241395ec 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -5132,7 +5132,8 @@ static void handleCallConvAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
// Diagnostic is emitted elsewhere: here we store the (valid) AL
// in the Decl node for syntactic reasoning, e.g., pretty-printing.
CallingConv CC;
- if (S.CheckCallingConvAttr(AL, CC, /*FD*/nullptr))
+ if (S.CheckCallingConvAttr(AL, CC, /*FD*/ nullptr,
+ S.IdentifyCUDATarget(dyn_cast<FunctionDecl>(D))))
return;
if (!isa<ObjCMethodDecl>(D)) {
@@ -5317,7 +5318,8 @@ static void handleNoRandomizeLayoutAttr(Sema &S, Decl *D,
}
bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
- const FunctionDecl *FD) {
+ const FunctionDecl *FD,
+ CUDAFunctionTarget CFT) {
if (Attrs.isInvalid())
return true;
@@ -5416,7 +5418,8 @@ bool Sema::CheckCallingConvAttr(const ParsedAttr &Attrs, CallingConv &CC,
// on their host/device attributes.
if (LangOpts.CUDA) {
auto *Aux = Context.getAuxTargetInfo();
- auto CudaTarget = IdentifyCUDATarget(FD);
+ assert(FD || CFT != CFT_InvalidTarget);
+ auto CudaTarget = FD ? IdentifyCUDATarget(FD) : CFT;
bool CheckHost = false, CheckDevice = false;
switch (CudaTarget) {
case CFT_HostDevice:
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index fb62dee4aa58eae..d69b339306f0060 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -6697,17 +6697,19 @@ void Sema::AddOverloadCandidate(
}
// (CUDA B.1): Check for invalid calls between targets.
- if (getLangOpts().CUDA)
- if (const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true))
- // Skip the check for callers that are implicit members, because in this
- // case we may not yet know what the member's target is; the target is
- // inferred for the member automatically, based on the bases and fields of
- // the class.
- if (!Caller->isImplicit() && !IsAllowedCUDACall(Caller, Function)) {
- Candidate.Viable = false;
- Candidate.FailureKind = ovl_fail_bad_target;
- return;
- }
+ if (getLangOpts().CUDA) {
+ const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true);
+ // Skip the check for callers that are implicit members, because in this
+ // case we may not yet know what the member's target is; the target is
+ // inferred for the member automatically, based on the bases and fields of
+ // the class.
+ if (!(Caller && Caller->isImplicit()) &&
+ !IsAllowedCUDACall(Caller, Function)) {
+ Candidate.Viable = false;
+ Candidate.FailureKind = ovl_fail_bad_target;
+ return;
+ }
+ }
if (Function->getTrailingRequiresClause()) {
ConstraintSatisfaction Satisfaction;
@@ -7219,12 +7221,11 @@ Sema::AddMethodCandidate(CXXMethodDecl *Method, DeclAccessPair FoundDecl,
// (CUDA B.1): Check for invalid calls between targets.
if (getLangOpts().CUDA)
- if (const FunctionDecl *Caller = getCurFunctionDecl(/*AllowLambda=*/true))
- if (!IsAllowedCUDACall(Caller, Method)) {
- Candidate.Viable = false;
- Candidate.FailureKind = ovl_fail_bad_target;
- return;
- }
+ if (!IsAllowedCUDACall(getCurFunctionDecl(/*AllowLambda=*/true), Method)) {
+ Candidate.Viable = false;
+ Candidate.FailureKind = ovl_fail_bad_target;
+ return;
+ }
if (Method->getTrailingRequiresClause()) {
ConstraintSatisfaction Satisfaction;
@@ -12495,10 +12496,12 @@ class AddressOfFunctionResolver {
return false;
if (FunctionDecl *FunDecl = dyn_cast<FunctionDecl>(Fn)) {
- if (S.getLangOpts().CUDA)
- if (FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true))
- if (!Caller->isImplicit() && !S.IsAllowedCUDACall(Caller, FunDecl))
- return false;
+ if (S.getLangOpts().CUDA) {
+ FunctionDecl *Caller = S.getCurFunctionDecl(/*AllowLambda=*/true);
+ if (!(Caller && Caller->isImplicit()) &&
+ !S.IsAllowedCUDACall(Caller, FunDecl))
+ return false;
+ }
if (FunDecl->isMultiVersion()) {
const auto *TA = FunDecl->getAttr<TargetAttr>();
if (TA && !TA->isDefaultVersion())
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 94d170af867193c..ffd29446b4f2edd 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -366,12 +366,14 @@ enum TypeAttrLocation {
TAL_DeclName
};
-static void processTypeAttrs(TypeProcessingState &state, QualType &type,
- TypeAttrLocation TAL,
- const ParsedAttributesView &attrs);
+static void
+processTypeAttrs(TypeProcessingState &state, QualType &type,
+ TypeAttrLocation TAL, const ParsedAttributesView &attrs,
+ Sema::CUDAFunctionTarget CFT = Sema::CFT_HostDevice);
static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
- QualType &type);
+ QualType &type,
+ Sema::CUDAFunctionTarget CFT);
static bool handleMSPointerTypeQualifierAttr(TypeProcessingState &state,
ParsedAttr &attr, QualType &type);
@@ -617,7 +619,8 @@ static void distributeFunctionTypeAttr(TypeProcessingState &state,
/// distributed, false if no location was found.
static bool distributeFunctionTypeAttrToInnermost(
TypeProcessingState &state, ParsedAttr &attr,
- ParsedAttributesView &attrList, QualType &declSpecType) {
+ ParsedAttributesView &attrList, QualType &declSpecType,
+ Sema::CUDAFunctionTarget CFT) {
Declarator &declarator = state.getDeclarator();
// Put it on the innermost function chunk, if there is one.
@@ -629,19 +632,20 @@ static bool distributeFunctionTypeAttrToInnermost(
return true;
}
- return handleFunctionTypeAttr(state, attr, declSpecType);
+ return handleFunctionTypeAttr(state, attr, declSpecType, CFT);
}
/// A function type attribute was written in the decl spec. Try to
/// apply it somewhere.
-static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
- ParsedAttr &attr,
- QualType &declSpecType) {
+static void
+distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
+ ParsedAttr &attr, QualType &declSpecType,
+ Sema::CUDAFunctionTarget CFT) {
state.saveDeclSpecAttrs();
// Try to distribute to the innermost.
if (distributeFunctionTypeAttrToInnermost(
- state, attr, state.getCurrentAttributes(), declSpecType))
+ state, attr, state.getCurrentAttributes(), declSpecType, CFT))
return;
// If that failed, diagnose the bad attribute when the declarator is
@@ -653,14 +657,14 @@ static void distributeFunctionTypeAttrFromDeclSpec(TypeProcessingState &state,
/// Try to apply it somewhere.
/// `Attrs` is the attribute list containing the declaration (either of the
/// declarator or the declaration).
-static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state,
- ParsedAttr &attr,
- QualType &declSpecType) {
+static void distributeFunctionTypeAttrFromDeclarator(
+ TypeProcessingState &state, ParsedAttr &attr, QualType &declSpecType,
+ Sema::CUDAFunctionTarget CFT) {
Declarator &declarator = state.getDeclarator();
// Try to distribute to the innermost.
if (distributeFunctionTypeAttrToInnermost(
- state, attr, declarator.getAttributes(), declSpecType))
+ state, attr, declarator.getAttributes(), declSpecType, CFT))
return;
// If that failed, diagnose the bad attribute when the declarator is
@@ -682,7 +686,8 @@ static void distributeFunctionTypeAttrFromDeclarator(TypeProcessingState &state,
/// `Attrs` is the attribute list containing the declaration (either of the
/// declarator or the declaration).
static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state,
- QualType &declSpecType) {
+ QualType &declSpecType,
+ Sema::CUDAFunctionTarget CFT) {
// The called functions in this loop actually remove things from the current
// list, so iterating over the existing list isn't possible. Instead, make a
// non-owning copy and iterate over that.
@@ -699,7 +704,7 @@ static void distributeTypeAttrsFromDeclarator(TypeProcessingState &state,
break;
FUNCTION_TYPE_ATTRS_CASELIST:
- distributeFunctionTypeAttrFromDeclarator(state, attr, declSpecType);
+ distributeFunctionTypeAttrFromDeclarator(state, attr, declSpecType, CFT);
break;
MS_TYPE_ATTRS_CASELIST:
@@ -3544,7 +3549,8 @@ static QualType GetDeclSpecTypeForDeclarator(TypeProcessingState &state,
// Note: We don't need to distribute declaration attributes (i.e.
// D.getDeclarationAttributes()) because those are always C++11 attributes,
// and those don't get distributed.
- distributeTypeAttrsFromDeclarator(state, T);
+ distributeTypeAttrsFromDeclarator(
+ state, T, SemaRef.IdentifyCUDATarget(D.getAttributes()));
// Find the deduced type in this type. Look in the trailing return type if we
// have one, otherwise in the DeclSpec type.
@@ -4055,7 +4061,8 @@ static CallingConv getCCForDeclaratorChunk(
// function type. We'll diagnose the failure to apply them in
// handleFunctionTypeAttr.
CallingConv CC;
- if (!S.CheckCallingConvAttr(AL, CC) &&
+ if (!S.CheckCallingConvAttr(AL, CC, /*FunctionDecl=*/nullptr,
+ S.IdentifyCUDATarget(D.getAttributes())) &&
(!FTI.isVariadic || supportsVariadicCall(CC))) {
return CC;
}
@@ -5727,7 +5734,8 @@ static TypeSourceInfo *GetFullTypeForDeclarator(TypeProcessingState &state,
}
// See if there are any attributes on this declarator chunk.
- processTypeAttrs(state, T, TAL_DeclChunk, DeclType.getAttrs());
+ processTypeAttrs(state, T, TAL_DeclChunk, DeclType.getAttrs(),
+ S.IdentifyCUDATarget(D.getAttributes()));
if (DeclType.Kind != DeclaratorChunk::Paren) {
if (ExpectNoDerefChunk && !IsNoDerefableChunk(DeclType))
@@ -7801,7 +7809,8 @@ static bool checkMutualExclusion(TypeProcessingState &state,
/// Process an individual function attribute. Returns true to
/// indicate that the attribute was handled, false if it wasn't.
static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
- QualType &type) {
+ QualType &type,
+ Sema::CUDAFunctionTarget CFT) {
Sema &S = state.getSema();
FunctionTypeUnwrapper unwrapped(S, type);
@@ -8032,7 +8041,7 @@ static bool handleFunctionTypeAttr(TypeProcessingState &state, ParsedAttr &attr,
// Otherwise, a calling convention.
CallingConv CC;
- if (S.CheckCallingConvAttr(attr, CC))
+ if (S.CheckCallingConvAttr(attr, CC, /*FunctionDecl=*/nullptr, CFT))
return true;
const FunctionType *fn = unwrapped.get();
@@ -8584,7 +8593,8 @@ static void HandleLifetimeBoundAttr(TypeProcessingState &State,
static void processTypeAttrs(TypeProcessingState &state, QualType &type,
TypeAttrLocation TAL,
- const ParsedAttributesView &attrs) {
+ const ParsedAttributesView &attrs,
+ Sema::CUDAFunctionTarget CFT) {
state.setParsedNoDeref(false);
if (attrs.empty())
@@ -8826,7 +8836,7 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
// appertain to and hence should not use the "distribution" logic below.
if (attr.isStandardAttributeSyntax() ||
attr.isRegularKeywordAttribute()) {
- if (!handleFunctionTypeAttr(state, attr, type)) {
+ if (!handleFunctionTypeAttr(state, attr, type, CFT)) {
diagnoseBadTypeAttribute(state.getSema(), attr, type);
attr.setInvalid();
}
@@ -8836,10 +8846,10 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
// Never process function type attributes as part of the
// declaration-specifiers.
if (TAL == TAL_DeclSpec)
- distributeFunctionTypeAttrFromDeclSpec(state, attr, type);
+ distributeFunctionTypeAttrFromDeclSpec(state, attr, type, CFT);
// Otherwise, handle the possible delays.
- else if (!handleFunctionTypeAttr(state, attr, type))
+ else if (!handleFunctionTypeAttr(state, attr, type, CFT))
distributeFunctionTypeAttr(state, attr, type);
break;
case ParsedAttr::AT_AcquireHandle: {
diff --git a/clang/test/CodeGenCUDA/global-initializers.cu b/clang/test/CodeGenCUDA/global-initializers.cu
new file mode 100644
index 000000000000000..821260e9c7466ff
--- /dev/null
+++ b/clang/test/CodeGenCUDA/global-initializers.cu
@@ -0,0 +1,51 @@
+// RUN: %clang_cc1 %s -triple x86_64-linux-unknown -emit-llvm -o - \
+// RUN: | FileCheck -check-prefix=HOST %s
+// RUN: %clang_cc1 %s -fcuda-is-device \
+// RUN: -emit-llvm -o - -triple nvptx64 \
+// RUN: -aux-triple x86_64-unknown-linux-gnu | FileCheck \
+// RUN: -check-prefix=DEV %s
+
+#include "Inputs/cuda.h"
+
+// Check host/device-based overloding resolution in global variable initializer.
+double pow(double, double) { return 1.0; }
+
+__device__ double pow(double, int) { return 2.0; }
+
+// HOST-DAG: call {{.*}}double @_Z3powdd(double noundef 1.000000e+00, double noundef 1.000000e+00)
+double X = pow(1.0, 1);
+
+constexpr double cpow(double, double) { return 11.0; }
+
+constexpr __device__ double cpow(double, int) { return 12.0; }
+
+// HOST-DAG: @CX = global double 1.100000e+01
+double CX = cpow(11.0, 1);
+
+// DEV-DAG: @CY = addrspace(1) externally_initialized global double 1.200000e+01
+__device__ double CY = cpow(12.0, 1);
+
+struct A {
+ double pow(double, double) { return 3.0; }
+
+ __device__ double pow(double, int) { return 4.0; }
+};
+
+A a;
+
+// HOST-DAG: call {{.*}}double @_ZN1A3powEdd(ptr {{.*}}@a, double noundef 3.000000e+00, double noundef 1.000000e+00)
+double AX = a.pow(3.0, 1);
+
+struct CA {
+ constexpr double cpow(double, double) const { return 13.0; }
+
+ constexpr __device__ double cpow(double, int) const { return 14.0; }
+};
+
+const CA ca;
+
+// HOST-DAG: @CAX = global double 1.300000e+01
+double CAX = ca.cpow(13.0, 1);
+
+// DEV-DAG: @CAY = addrspace(1) externally_initialized global double 1.400000e+01
+__device__ double CAY = ca.cpow(14.0, 1);
diff --git a/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu b/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu
index 7636572f69833c4..7ef8a94750b4c47 100644
--- a/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu
+++ b/clang/test/SemaCUDA/amdgpu-windows-vectorcall.cu
@@ -1,4 +1,5 @@
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fms-compatibility -fsyntax-only -verify %s
__cdecl void hostf1();
__vectorcall void (*hostf2)() = hostf1; // expected-error {{cannot initialize a variable of type 'void ((*))() __attribute__((vectorcall))' with an lvalue of type 'void () __attribute__((cdecl))'}}
diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu
index 822e259968206c7..163648cd9a87af8 100644
--- a/clang/test/SemaCUDA/function-overload.cu
+++ b/clang/test/SemaCUDA/function-overload.cu
@@ -222,7 +222,13 @@ __host__ __device__ void hostdevicef() {
// Test for address of overloaded function resolution in the global context.
HostFnPtr fp_h = h;
HostFnPtr fp_ch = ch;
+#if defined (__CUDA_ARCH__)
+__device__
+#endif
CurrentFnPtr fp_dh = dh;
+#if defined (__CUDA_ARCH__)
+__device__
+#endif
CurrentFnPtr fp_cdh = cdh;
GlobalFnPtr fp_g = g;
diff --git a/clang/test/SemaCUDA/global-initializers-host.cu b/clang/test/SemaCUDA/global-initializers-host.cu
deleted file mode 100644
index 810c6b9777860b0..000000000000000
--- a/clang/test/SemaCUDA/global-initializers-host.cu
+++ /dev/null
@@ -1,32 +0,0 @@
-// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-linux-unknown -fsyntax-only -o - -verify
-
-#include "Inputs/cuda.h"
-
-// Check that we get an error if we try to call a __device__ function from a
-// module initializer.
-
-struct S {
- __device__ S() {}
- // expected-note at -1 {{'S' declared here}}
-};
-
-S s;
-// expected-error at -1 {{reference to __device__ function 'S' in global initializer}}
-
-struct T {
- __host__ __device__ T() {}
-};
-T t; // No error, this is OK.
-
-struct U {
- __host__ U() {}
- __device__ U(int) {}
- // expected-note at -1 {{'U' declared here}}
-};
-U u(42);
-// expected-error at -1 {{reference to __device__ function 'U' in global initializer}}
-
-__device__ int device_fn() { return 42; }
-// expected-note at -1 {{'device_fn' declared here}}
-int n = device_fn();
-// expected-error at -1 {{reference to __device__ function 'device_fn' in global initializer}}
diff --git a/clang/test/SemaCUDA/global-initializers.cu b/clang/test/SemaCUDA/global-initializers.cu
new file mode 100644
index 000000000000000..29e386134a3ddc3
--- /dev/null
+++ b/clang/test/SemaCUDA/global-initializers.cu
@@ -0,0 +1,72 @@
+// RUN: %clang_cc1 %s -triple x86_64-linux-unknown -fsyntax-only -o - -verify
+// RUN: %clang_cc1 %s -fcuda-is-device -triple nvptx -fsyntax-only -o - -verify
+
+#include "Inputs/cuda.h"
+
+// Check that we get an error if we try to call a __device__ function from a
+// module initializer.
+
+struct S {
+ // expected-note at -1 {{candidate constructor (the implicit copy constructor) not viable: requires 1 argument, but 0 were provided}}
+ // expected-note at -2 {{candidate constructor (the implicit move constructor) not viable: requires 1 argument, but 0 were provided}}
+ __device__ S() {}
+ // expected-note at -1 {{candidate constructor not viable: call to __device__ function from __host__ function}}
+};
+
+S s;
+// expected-error at -1 {{no matching constructor for initialization of 'S'}}
+
+struct T {
+ __host__ __device__ T() {}
+};
+T t; // No error, this is OK.
+
+struct U {
+ // expected-note at -1 {{candidate constructor (the implicit copy constructor) not viable: no known conversion from 'int' to 'const U' for 1st argument}}
+ // expected-note at -2 {{candidate constructor (the implicit move constructor) not viable: no known conversion from 'int' to 'U' for 1st argument}}
+ __host__ U() {}
+ // expected-note at -1 {{candidate constructor not viable: requires 0 arguments, but 1 was provided}}
+ __device__ U(int) {}
+ // expected-note at -1 {{candidate constructor not viable: call to __device__ function from __host__ function}}
+};
+U u(42);
+// expected-error at -1 {{no matching constructor for initialization of 'U'}}
+
+__device__ int device_fn() { return 42; }
+// expected-note at -1 {{candidate function not viable: call to __device__ function from __host__ function}}
+int n = device_fn();
+// expected-error at -1 {{no matching function for call to 'device_fn'}}
+
+// Check host/device-based overloding resolution in global variable initializer.
+double pow(double, double);
+
+__device__ double pow(double, int);
+
+double X = pow(1.0, 1);
+__device__ double Y = pow(2.0, 2); // expected-error{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+
+constexpr double cpow(double, double) { return 1.0; }
+
+constexpr __device__ double cpow(double, int) { return 2.0; }
+
+const double CX = cpow(1.0, 1);
+const __device__ double CY = cpow(2.0, 2);
+
+struct A {
+ double pow(double, double);
+
+ __device__ double pow(double, int);
+
+ constexpr double cpow(double, double) const { return 1.0; }
+
+ constexpr __device__ double cpow(double, int) const { return 1.0; }
+
+};
+
+A a;
+double AX = a.pow(1.0, 1);
+__device__ double AY = a.pow(2.0, 2); // expected-error{{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
+
+const A ca;
+const double CAX = ca.cpow(1.0, 1);
+const __device__ double CAY = ca.cpow(2.0, 2);
diff --git a/clang/test/SemaCUDA/windows-calling-conv.cu b/clang/test/SemaCUDA/windows-calling-conv.cu
new file mode 100644
index 000000000000000..0786f65bddb8e82
--- /dev/null
+++ b/clang/test/SemaCUDA/windows-calling-conv.cu
@@ -0,0 +1,17 @@
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -aux-triple \
+// RUN: x86_64-pc-windows-msvc -fms-compatibility -fcuda-is-device \
+// RUN: -fsyntax-only -verify -x hip %s
+// RUN: %clang_cc1 -triple x86_64-pc-windows-msvc -fms-compatibility \
+// RUN: -fsyntax-only -verify -x hip %s
+
+// expected-no-diagnostics
+
+typedef void (__stdcall* funcTy)();
+void invoke(funcTy f);
+
+static void __stdcall callee() noexcept {
+}
+
+void foo() {
+ invoke(callee);
+}
More information about the cfe-commits
mailing list