[clang] [clang] Introduce `SemaCUDA` (PR #88559)
via cfe-commits
cfe-commits at lists.llvm.org
Fri Apr 12 11:55:42 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Vlad Serebrennikov (Endilll)
<details>
<summary>Changes</summary>
This patch moves CUDA-related `Sema` function into new `SemaCUDA` class, following the recent example of SYCL, OpenACC, and HLSL. This is a part of the effort to split Sema. Additional context can be found in https://github.com/llvm/llvm-project/pull/82217, https://github.com/llvm/llvm-project/pull/84184, https://github.com/llvm/llvm-project/pull/87634.
---
Patch is 108.32 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/88559.diff
24 Files Affected:
- (modified) clang/include/clang/Basic/Cuda.h (+8)
- (modified) clang/include/clang/Sema/Sema.h (+14-290)
- (modified) clang/include/clang/Sema/SemaBase.h (+1-1)
- (added) clang/include/clang/Sema/SemaCUDA.h (+304)
- (modified) clang/include/clang/Serialization/ASTReader.h (+1-1)
- (modified) clang/lib/Parse/ParseDecl.cpp (+3-1)
- (modified) clang/lib/Parse/ParseExpr.cpp (+3-4)
- (modified) clang/lib/Parse/ParsePragma.cpp (+3-2)
- (modified) clang/lib/Sema/Sema.cpp (+10-8)
- (modified) clang/lib/Sema/SemaBase.cpp (+3-2)
- (modified) clang/lib/Sema/SemaCUDA.cpp (+143-134)
- (modified) clang/lib/Sema/SemaDecl.cpp (+10-9)
- (modified) clang/lib/Sema/SemaDeclAttr.cpp (+7-5)
- (modified) clang/lib/Sema/SemaDeclCXX.cpp (+11-10)
- (modified) clang/lib/Sema/SemaExpr.cpp (+9-8)
- (modified) clang/lib/Sema/SemaExprCXX.cpp (+14-13)
- (modified) clang/lib/Sema/SemaLambda.cpp (+5-4)
- (modified) clang/lib/Sema/SemaOverload.cpp (+31-29)
- (modified) clang/lib/Sema/SemaStmt.cpp (+3-2)
- (modified) clang/lib/Sema/SemaTemplate.cpp (+8-7)
- (modified) clang/lib/Sema/SemaTemplateInstantiateDecl.cpp (+2-1)
- (modified) clang/lib/Sema/SemaType.cpp (+5-4)
- (modified) clang/lib/Serialization/ASTReader.cpp (+3-2)
- (modified) clang/lib/Serialization/ASTWriter.cpp (+3-2)
``````````diff
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 3e77a74c7c0092..acc6bb6581d857 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -126,6 +126,14 @@ enum class CudaArch {
HIPDefault = CudaArch::GFX906,
};
+enum class CUDAFunctionTarget {
+ Device,
+ Global,
+ Host,
+ HostDevice,
+ InvalidTarget
+};
+
static inline bool IsNVIDIAGpuArch(CudaArch A) {
return A >= CudaArch::SM_20 && A < CudaArch::GFX600;
}
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 00888b7f7a738e..6b9789334811ec 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -38,6 +38,7 @@
#include "clang/AST/TypeOrdering.h"
#include "clang/Basic/BitmaskEnum.h"
#include "clang/Basic/Builtins.h"
+#include "clang/Basic/Cuda.h"
#include "clang/Basic/DarwinSDKInfo.h"
#include "clang/Basic/ExpressionTraits.h"
#include "clang/Basic/Module.h"
@@ -183,6 +184,7 @@ class Preprocessor;
class PseudoDestructorTypeStorage;
class PseudoObjectExpr;
class QualType;
+class SemaCUDA;
class SemaHLSL;
class SemaOpenACC;
class SemaSYCL;
@@ -435,14 +437,6 @@ enum class CXXSpecialMemberKind {
Invalid
};
-enum class CUDAFunctionTarget {
- Device,
- Global,
- Host,
- HostDevice,
- InvalidTarget
-};
-
/// Sema - This implements semantic analysis and AST building for C.
/// \nosubgrouping
class Sema final : public SemaBase {
@@ -486,8 +480,7 @@ class Sema final : public SemaBase {
// 35. Code Completion (SemaCodeComplete.cpp)
// 36. FixIt Helpers (SemaFixItUtils.cpp)
// 37. Name Lookup for RISC-V Vector Intrinsic (SemaRISCVVectorLookup.cpp)
- // 38. CUDA (SemaCUDA.cpp)
- // 39. OpenMP Directives and Clauses (SemaOpenMP.cpp)
+ // 38. OpenMP Directives and Clauses (SemaOpenMP.cpp)
/// \name Semantic Analysis
/// Implementations are in Sema.cpp
@@ -981,9 +974,19 @@ class Sema final : public SemaBase {
return DelayedDiagnostics.push(pool);
}
+ /// Diagnostics that are emitted only if we discover that the given function
+ /// must be codegen'ed. Because handling these correctly adds overhead to
+ /// compilation, this is currently only enabled for CUDA compilations.
+ SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags;
+
/// CurContext - This is the current declaration context of parsing.
DeclContext *CurContext;
+ SemaCUDA &CUDA() {
+ assert(CUDAPtr);
+ return *CUDAPtr;
+ }
+
SemaHLSL &HLSL() {
assert(HLSLPtr);
return *HLSLPtr;
@@ -1029,6 +1032,7 @@ class Sema final : public SemaBase {
mutable IdentifierInfo *Ident_super;
+ std::unique_ptr<SemaCUDA> CUDAPtr;
std::unique_ptr<SemaHLSL> HLSLPtr;
std::unique_ptr<SemaOpenACC> OpenACCPtr;
std::unique_ptr<SemaSYCL> SYCLPtr;
@@ -12908,258 +12912,6 @@ class Sema final : public SemaBase {
//
//
- /// \name CUDA
- /// Implementations are in SemaCUDA.cpp
- ///@{
-
-public:
- /// Increments our count of the number of times we've seen a pragma forcing
- /// functions to be __host__ __device__. So long as this count is greater
- /// than zero, all functions encountered will be __host__ __device__.
- void PushForceCUDAHostDevice();
-
- /// Decrements our count of the number of times we've seen a pragma forcing
- /// functions to be __host__ __device__. Returns false if the count is 0
- /// before incrementing, so you can emit an error.
- bool PopForceCUDAHostDevice();
-
- ExprResult ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
- MultiExprArg ExecConfig,
- SourceLocation GGGLoc);
-
- /// Diagnostics that are emitted only if we discover that the given function
- /// must be codegen'ed. Because handling these correctly adds overhead to
- /// compilation, this is currently only enabled for CUDA compilations.
- SemaDiagnosticBuilder::DeferredDiagnosticsType DeviceDeferredDiags;
-
- /// A pair of a canonical FunctionDecl and a SourceLocation. When used as the
- /// key in a hashtable, both the FD and location are hashed.
- struct FunctionDeclAndLoc {
- CanonicalDeclPtr<const FunctionDecl> FD;
- SourceLocation Loc;
- };
-
- /// FunctionDecls and SourceLocations for which CheckCUDACall has emitted a
- /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the
- /// same deferred diag twice.
- llvm::DenseSet<FunctionDeclAndLoc> LocsWithCUDACallDiags;
-
- /// An inverse call graph, mapping known-emitted functions to one of their
- /// known-emitted callers (plus the location of the call).
- ///
- /// Functions that we can tell a priori must be emitted aren't added to this
- /// map.
- llvm::DenseMap</* Callee = */ CanonicalDeclPtr<const FunctionDecl>,
- /* Caller = */ FunctionDeclAndLoc>
- DeviceKnownEmittedFns;
-
- /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
- /// context is "used as device code".
- ///
- /// - If CurContext is a __host__ function, does not emit any diagnostics
- /// unless \p EmitOnBothSides is true.
- /// - If CurContext is a __device__ or __global__ function, emits the
- /// diagnostics immediately.
- /// - If CurContext is a __host__ __device__ function and we are compiling for
- /// the device, creates a diagnostic which is emitted if and when we realize
- /// that the function will be codegen'ed.
- ///
- /// Example usage:
- ///
- /// // Variable-length arrays are not allowed in CUDA device code.
- /// if (CUDADiagIfDeviceCode(Loc, diag::err_cuda_vla)
- /// << llvm::to_underlying(CurrentCUDATarget()))
- /// return ExprError();
- /// // Otherwise, continue parsing as normal.
- SemaDiagnosticBuilder CUDADiagIfDeviceCode(SourceLocation Loc,
- unsigned DiagID);
-
- /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
- /// context is "used as host code".
- ///
- /// Same as CUDADiagIfDeviceCode, with "host" and "device" switched.
- SemaDiagnosticBuilder CUDADiagIfHostCode(SourceLocation Loc, unsigned DiagID);
-
- /// Determines whether the given function is a CUDA device/host/kernel/etc.
- /// function.
- ///
- /// Use this rather than examining the function's attributes yourself -- you
- /// will get it wrong. Returns CUDAFunctionTarget::Host if D is null.
- CUDAFunctionTarget IdentifyCUDATarget(const FunctionDecl *D,
- bool IgnoreImplicitHDAttr = false);
- CUDAFunctionTarget IdentifyCUDATarget(const ParsedAttributesView &Attrs);
-
- enum CUDAVariableTarget {
- CVT_Device, /// Emitted on device side with a shadow variable on host side
- CVT_Host, /// Emitted on host side only
- CVT_Both, /// Emitted on both sides with different addresses
- CVT_Unified, /// Emitted as a unified address, e.g. managed variables
- };
- /// Determines whether the given variable is emitted on host or device side.
- CUDAVariableTarget IdentifyCUDATarget(const VarDecl *D);
-
- /// Defines kinds of CUDA global host/device context where a function may be
- /// called.
- enum CUDATargetContextKind {
- CTCK_Unknown, /// Unknown context
- CTCK_InitGlobalVar, /// Function called during global variable
- /// initialization
- };
-
- /// Define the current global CUDA host/device context where a function may be
- /// called. Only used when a function is called outside of any functions.
- struct CUDATargetContext {
- CUDAFunctionTarget Target = CUDAFunctionTarget::HostDevice;
- CUDATargetContextKind Kind = CTCK_Unknown;
- Decl *D = nullptr;
- } CurCUDATargetCtx;
-
- struct CUDATargetContextRAII {
- Sema &S;
- CUDATargetContext SavedCtx;
- CUDATargetContextRAII(Sema &S_, CUDATargetContextKind K, Decl *D);
- ~CUDATargetContextRAII() { S.CurCUDATargetCtx = SavedCtx; }
- };
-
- /// Gets the CUDA target for the current context.
- CUDAFunctionTarget CurrentCUDATarget() {
- return IdentifyCUDATarget(dyn_cast<FunctionDecl>(CurContext));
- }
-
- static bool isCUDAImplicitHostDeviceFunction(const FunctionDecl *D);
-
- // CUDA function call preference. Must be ordered numerically from
- // worst to best.
- enum CUDAFunctionPreference {
- CFP_Never, // Invalid caller/callee combination.
- CFP_WrongSide, // Calls from host-device to host or device
- // function that do not match current compilation
- // mode.
- CFP_HostDevice, // Any calls to host/device functions.
- CFP_SameSide, // Calls from host-device to host or device
- // function matching current compilation mode.
- CFP_Native, // host-to-host or device-to-device calls.
- };
-
- /// Identifies relative preference of a given Caller/Callee
- /// combination, based on their host/device attributes.
- /// \param Caller function which needs address of \p Callee.
- /// nullptr in case of global context.
- /// \param Callee target function
- ///
- /// \returns preference value for particular Caller/Callee combination.
- CUDAFunctionPreference IdentifyCUDAPreference(const FunctionDecl *Caller,
- const FunctionDecl *Callee);
-
- /// Determines whether Caller may invoke Callee, based on their CUDA
- /// host/device attributes. Returns false if the call is not allowed.
- ///
- /// Note: Will return true for CFP_WrongSide calls. These may appear in
- /// semantically correct CUDA programs, but only if they're never codegen'ed.
- bool IsAllowedCUDACall(const FunctionDecl *Caller,
- const FunctionDecl *Callee) {
- return IdentifyCUDAPreference(Caller, Callee) != CFP_Never;
- }
-
- /// May add implicit CUDAHostAttr and CUDADeviceAttr attributes to FD,
- /// depending on FD and the current compilation settings.
- void maybeAddCUDAHostDeviceAttrs(FunctionDecl *FD,
- const LookupResult &Previous);
-
- /// May add implicit CUDAConstantAttr attribute to VD, depending on VD
- /// and current compilation settings.
- void MaybeAddCUDAConstantAttr(VarDecl *VD);
-
- /// Check whether we're allowed to call Callee from the current context.
- ///
- /// - If the call is never allowed in a semantically-correct program
- /// (CFP_Never), emits an error and returns false.
- ///
- /// - If the call is allowed in semantically-correct programs, but only if
- /// it's never codegen'ed (CFP_WrongSide), creates a deferred diagnostic to
- /// be emitted if and when the caller is codegen'ed, and returns true.
- ///
- /// Will only create deferred diagnostics for a given SourceLocation once,
- /// so you can safely call this multiple times without generating duplicate
- /// deferred errors.
- ///
- /// - Otherwise, returns true without emitting any diagnostics.
- bool CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee);
-
- void CUDACheckLambdaCapture(CXXMethodDecl *D, const sema::Capture &Capture);
-
- /// Set __device__ or __host__ __device__ attributes on the given lambda
- /// operator() method.
- ///
- /// CUDA lambdas by default is host device function unless it has explicit
- /// host or device attribute.
- void CUDASetLambdaAttrs(CXXMethodDecl *Method);
-
- /// Record \p FD if it is a CUDA/HIP implicit host device function used on
- /// device side in device compilation.
- void CUDARecordImplicitHostDeviceFuncUsedByDevice(const FunctionDecl *FD);
-
- /// Finds a function in \p Matches with highest calling priority
- /// from \p Caller context and erases all functions with lower
- /// calling priority.
- void EraseUnwantedCUDAMatches(
- const FunctionDecl *Caller,
- SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches);
-
- /// Given a implicit special member, infer its CUDA target from the
- /// calls it needs to make to underlying base/field special members.
- /// \param ClassDecl the class for which the member is being created.
- /// \param CSM the kind of special member.
- /// \param MemberDecl the special member itself.
- /// \param ConstRHS true if this is a copy operation with a const object on
- /// its RHS.
- /// \param Diagnose true if this call should emit diagnostics.
- /// \return true if there was an error inferring.
- /// The result of this call is implicit CUDA target attribute(s) attached to
- /// the member declaration.
- bool inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
- CXXSpecialMemberKind CSM,
- CXXMethodDecl *MemberDecl,
- bool ConstRHS, bool Diagnose);
-
- /// \return true if \p CD can be considered empty according to CUDA
- /// (E.2.3.1 in CUDA 7.5 Programming guide).
- bool isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD);
- bool isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *CD);
-
- // \brief Checks that initializers of \p Var satisfy CUDA restrictions. In
- // case of error emits appropriate diagnostic and invalidates \p Var.
- //
- // \details CUDA allows only empty constructors as initializers for global
- // variables (see E.2.3.1, CUDA 7.5). The same restriction also applies to all
- // __shared__ variables whether they are local or not (they all are implicitly
- // static in CUDA). One exception is that CUDA allows constant initializers
- // for __constant__ and __device__ variables.
- void checkAllowedCUDAInitializer(VarDecl *VD);
-
- /// Check whether NewFD is a valid overload for CUDA. Emits
- /// diagnostics and invalidates NewFD if not.
- void checkCUDATargetOverload(FunctionDecl *NewFD,
- const LookupResult &Previous);
- /// Copies target attributes from the template TD to the function FD.
- void inheritCUDATargetAttrs(FunctionDecl *FD, const FunctionTemplateDecl &TD);
-
- /// Returns the name of the launch configuration function. This is the name
- /// of the function that will be called to configure kernel call, with the
- /// parameters specified via <<<>>>.
- std::string getCudaConfigureFuncName() const;
-
-private:
- unsigned ForceCUDAHostDeviceDepth = 0;
-
- ///@}
-
- //
- //
- // -------------------------------------------------------------------------
- //
- //
-
/// \name OpenMP Directives and Clauses
/// Implementations are in SemaOpenMP.cpp
///@{
@@ -14546,32 +14298,4 @@ std::unique_ptr<sema::RISCVIntrinsicManager>
CreateRISCVIntrinsicManager(Sema &S);
} // end namespace clang
-namespace llvm {
-// Hash a FunctionDeclAndLoc by looking at both its FunctionDecl and its
-// SourceLocation.
-template <> struct DenseMapInfo<clang::Sema::FunctionDeclAndLoc> {
- using FunctionDeclAndLoc = clang::Sema::FunctionDeclAndLoc;
- using FDBaseInfo =
- DenseMapInfo<clang::CanonicalDeclPtr<const clang::FunctionDecl>>;
-
- static FunctionDeclAndLoc getEmptyKey() {
- return {FDBaseInfo::getEmptyKey(), clang::SourceLocation()};
- }
-
- static FunctionDeclAndLoc getTombstoneKey() {
- return {FDBaseInfo::getTombstoneKey(), clang::SourceLocation()};
- }
-
- static unsigned getHashValue(const FunctionDeclAndLoc &FDL) {
- return hash_combine(FDBaseInfo::getHashValue(FDL.FD),
- FDL.Loc.getHashValue());
- }
-
- static bool isEqual(const FunctionDeclAndLoc &LHS,
- const FunctionDeclAndLoc &RHS) {
- return LHS.FD == RHS.FD && LHS.Loc == RHS.Loc;
- }
-};
-} // namespace llvm
-
#endif
diff --git a/clang/include/clang/Sema/SemaBase.h b/clang/include/clang/Sema/SemaBase.h
index ff718022fca03c..3220f71dd797ed 100644
--- a/clang/include/clang/Sema/SemaBase.h
+++ b/clang/include/clang/Sema/SemaBase.h
@@ -146,7 +146,7 @@ class SemaBase {
/// if (SemaDiagnosticBuilder(...) << foo << bar)
/// return ExprError();
///
- /// But see CUDADiagIfDeviceCode() and CUDADiagIfHostCode() -- you probably
+ /// But see DiagIfDeviceCode() and DiagIfHostCode() -- you probably
/// want to use these instead of creating a SemaDiagnosticBuilder yourself.
operator bool() const { return isImmediate(); }
diff --git a/clang/include/clang/Sema/SemaCUDA.h b/clang/include/clang/Sema/SemaCUDA.h
new file mode 100644
index 00000000000000..71cde5a49f6b1a
--- /dev/null
+++ b/clang/include/clang/Sema/SemaCUDA.h
@@ -0,0 +1,304 @@
+//===----- SemaCUDA.h ----- Semantic Analysis for CUDA constructs ---------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file declares semantic analysis for CUDA constructs.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_SEMA_SEMACUDA_H
+#define LLVM_CLANG_SEMA_SEMACUDA_H
+
+#include "clang/AST/Decl.h"
+#include "clang/AST/DeclCXX.h"
+#include "clang/AST/Redeclarable.h"
+#include "clang/Basic/Cuda.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Sema/Lookup.h"
+#include "clang/Sema/Ownership.h"
+#include "clang/Sema/ParsedAttr.h"
+#include "clang/Sema/Scope.h"
+#include "clang/Sema/ScopeInfo.h"
+#include "clang/Sema/SemaBase.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include <string>
+
+namespace clang {
+
+enum class CUDAFunctionTarget;
+
+class SemaCUDA : public SemaBase {
+public:
+ SemaCUDA(Sema &S);
+
+ /// Increments our count of the number of times we've seen a pragma forcing
+ /// functions to be __host__ __device__. So long as this count is greater
+ /// than zero, all functions encountered will be __host__ __device__.
+ void PushForceHostDevice();
+
+ /// Decrements our count of the number of times we've seen a pragma forcing
+ /// functions to be __host__ __device__. Returns false if the count is 0
+ /// before incrementing, so you can emit an error.
+ bool PopForceHostDevice();
+
+ ExprResult ActOnExecConfigExpr(Scope *S, SourceLocation LLLLoc,
+ MultiExprArg ExecConfig,
+ SourceLocation GGGLoc);
+
+ /// A pair of a canonical FunctionDecl and a SourceLocation. When used as the
+ /// key in a hashtable, both the FD and location are hashed.
+ struct FunctionDeclAndLoc {
+ CanonicalDeclPtr<const FunctionDecl> FD;
+ SourceLocation Loc;
+ };
+
+ /// FunctionDecls and SourceLocations for which CheckCall has emitted a
+ /// (maybe deferred) "bad call" diagnostic. We use this to avoid emitting the
+ /// same deferred diag twice.
+ llvm::DenseSet<FunctionDeclAndLoc> LocsWithCUDACallDiags;
+
+ /// An inverse call graph, mapping known-emitted functions to one of their
+ /// known-emitted callers (plus the location of the call).
+ ///
+ /// Functions that we can tell a priori must be emitted aren't added to this
+ /// map.
+ llvm::DenseMap</* Callee = */ CanonicalDeclPtr<const FunctionDecl>,
+ /* Caller = */ FunctionDeclAndLoc>
+ DeviceKnownEmittedFns;
+
+ /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
+ /// context is "used as device code".
+ ///
+ /// - If CurContext is a __host__ function, does not emit any diagnostics
+ /// unless \p EmitOnBothSides is true.
+ /// - If CurContext is a __device__ or __global__ function, emits the
+ /// diagnostics immediately.
+ /// - If CurContext is a __host__ __device__ function and we are compiling for
+ /// the device, creates a diagnostic which is emitted if and when we realize
+ /// that the function will be codegen'ed.
+ ///
+ /// Example usage:
+ ///
+ /// // Variable-length arrays are not allowed in CUDA device code.
+ /// if (DiagIfDeviceCode(Loc, diag::err_cuda_vla) << CurrentTarget())
+ /// return ExprError();
+ /// // Otherwise, continue parsing as normal.
+ SemaDiagnosticBuilder DiagIfDeviceCode(SourceLocation Loc, unsigned DiagID);
+
+ /// Creates a SemaDiagnosticBuilder that emits the diagnostic if the current
+ /// context is "used as host code".
+ ///
+ /// Same as DiagIfDeviceCode, with "host" and "device" swi...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/88559
More information about the cfe-commits
mailing list