[clang] [libc] [libcxx] [llvm] Revert "[NVPTX] Make ctor/dtor lowering always enabled in NVPTX" (PR #126610)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 10 14:21:00 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang
Author: Florian Mayer (fmayer)
<details>
<summary>Changes</summary>
Reverts llvm/llvm-project#<!-- -->126544
This broke the build on sanitizer buildbots: https://lab.llvm.org/buildbot/#/builders/66/builds/9811
---
Full diff: https://github.com/llvm/llvm-project/pull/126610.diff
12 Files Affected:
- (modified) clang/include/clang/Basic/DiagnosticSemaKinds.td (-2)
- (modified) clang/lib/Driver/ToolChains/Cuda.cpp (+15-4)
- (modified) clang/lib/Driver/ToolChains/Cuda.h (+5-2)
- (modified) clang/lib/Sema/SemaDeclAttr.cpp (-9)
- (modified) clang/test/Driver/cuda-cross-compiling.c (+13)
- (modified) clang/test/SemaCUDA/device-var-init.cu (-9)
- (modified) libc/cmake/modules/LLVMLibCTestRules.cmake (+3-1)
- (modified) libcxx/test/configs/nvptx-libc++-shared.cfg.in (+2)
- (modified) llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp (+21)
- (added) llvm/test/CodeGen/NVPTX/global-ctor.ll (+9)
- (added) llvm/test/CodeGen/NVPTX/global-dtor.ll (+9)
- (modified) llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll (+1-1)
``````````diff
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index cf390724b07a484..bcae9e9f3009387 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -9193,8 +9193,6 @@ def err_cuda_device_exceptions : Error<
def err_dynamic_var_init : Error<
"dynamic initialization is not supported for "
"__device__, __constant__, __shared__, and __managed__ variables">;
-def err_cuda_ctor_dtor_attrs
- : Error<"CUDA does not support global %0 for __device__ functions">;
def err_shared_var_init : Error<
"initialization is not supported for __shared__ variables">;
def err_cuda_vla : Error<
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index d6487d4bc274de4..c7d5893085080fb 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -639,6 +639,9 @@ void NVPTX::Linker::ConstructJob(Compilation &C, const JobAction &JA,
CmdArgs.push_back(
Args.MakeArgString("--plugin-opt=-mattr=" + llvm::join(Features, ",")));
+ // Enable ctor / dtor lowering for the direct / freestanding NVPTX target.
+ CmdArgs.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
+
// Add paths for the default clang library path.
SmallString<256> DefaultLibPath =
llvm::sys::path::parent_path(TC.getDriver().Dir);
@@ -723,8 +726,9 @@ void NVPTX::getNVPTXTargetFeatures(const Driver &D, const llvm::Triple &Triple,
/// toolchain.
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::Triple &HostTriple,
- const ArgList &Args)
- : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args) {
+ const ArgList &Args, bool Freestanding = false)
+ : ToolChain(D, Triple, Args), CudaInstallation(D, HostTriple, Args),
+ Freestanding(Freestanding) {
if (CudaInstallation.isValid())
getProgramPaths().push_back(std::string(CudaInstallation.getBinPath()));
// Lookup binaries into the driver directory, this is used to
@@ -736,7 +740,8 @@ NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
/// system's default triple if not provided.
NVPTXToolChain::NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const ArgList &Args)
- : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args) {}
+ : NVPTXToolChain(D, Triple, llvm::Triple(LLVM_HOST_TRIPLE), Args,
+ /*Freestanding=*/true) {}
llvm::opt::DerivedArgList *
NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
@@ -777,7 +782,13 @@ NVPTXToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
void NVPTXToolChain::addClangTargetOptions(
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
- Action::OffloadKind DeviceOffloadingKind) const {}
+ Action::OffloadKind DeviceOffloadingKind) const {
+ // If we are compiling with a standalone NVPTX toolchain we want to try to
+ // mimic a standard environment as much as possible. So we enable lowering
+ // ctor / dtor functions to global symbols that can be registered.
+ if (Freestanding && !getDriver().isUsingLTO())
+ CC1Args.append({"-mllvm", "--nvptx-lower-global-ctor-dtor"});
+}
bool NVPTXToolChain::supportsDebugInfoOption(const llvm::opt::Arg *A) const {
const Option &O = A->getOption();
diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h
index 259eda6ebcadfb4..c2219ec47cfa979 100644
--- a/clang/lib/Driver/ToolChains/Cuda.h
+++ b/clang/lib/Driver/ToolChains/Cuda.h
@@ -132,8 +132,8 @@ namespace toolchains {
class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
public:
NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
- const llvm::Triple &HostTriple,
- const llvm::opt::ArgList &Args);
+ const llvm::Triple &HostTriple, const llvm::opt::ArgList &Args,
+ bool Freestanding);
NVPTXToolChain(const Driver &D, const llvm::Triple &Triple,
const llvm::opt::ArgList &Args);
@@ -179,6 +179,9 @@ class LLVM_LIBRARY_VISIBILITY NVPTXToolChain : public ToolChain {
protected:
Tool *buildAssembler() const override; // ptxas.
Tool *buildLinker() const override; // nvlink.
+
+private:
+ bool Freestanding = false;
};
class LLVM_LIBRARY_VISIBILITY CudaToolChain : public NVPTXToolChain {
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 527db176cf8dd60..f351663c6824e36 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -7484,15 +7484,6 @@ void Sema::ProcessDeclAttributeList(
}
}
- // Do not permit 'constructor' or 'destructor' attributes on __device__ code.
- if (getLangOpts().CUDAIsDevice && D->hasAttr<CUDADeviceAttr>() &&
- (D->hasAttr<ConstructorAttr>() || D->hasAttr<DestructorAttr>()) &&
- !getLangOpts().GPUAllowDeviceInit) {
- Diag(D->getLocation(), diag::err_cuda_ctor_dtor_attrs)
- << (D->hasAttr<ConstructorAttr>() ? "constructors" : "destructors");
- D->setInvalidDecl();
- }
-
// Do this check after processing D's attributes because the attribute
// objc_method_family can change whether the given method is in the init
// family, and it can be applied after objc_designated_initializer. This is a
diff --git a/clang/test/Driver/cuda-cross-compiling.c b/clang/test/Driver/cuda-cross-compiling.c
index 1df231ecb447946..7817e462c47be91 100644
--- a/clang/test/Driver/cuda-cross-compiling.c
+++ b/clang/test/Driver/cuda-cross-compiling.c
@@ -57,6 +57,19 @@
// LINK: clang-nvlink-wrapper{{.*}}"-o" "a.out" "-arch" "sm_61"{{.*}}[[CUBIN:.+]].o
+//
+// Test to ensure that we enable handling global constructors in a freestanding
+// Nvidia compilation.
+//
+// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=LOWERING %s
+// RUN: %clang -target nvptx64-nvidia-cuda -march=sm_70 -flto -c %s -### 2>&1 \
+// RUN: | FileCheck -check-prefix=LOWERING-LTO %s
+
+// LOWERING: -cc1" "-triple" "nvptx64-nvidia-cuda" {{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
+// LOWERING: clang-nvlink-wrapper{{.*}} "-mllvm" "--nvptx-lower-global-ctor-dtor"
+// LOWERING-LTO-NOT: "--nvptx-lower-global-ctor-dtor"
+
//
// Test passing arguments directly to nvlink.
//
diff --git a/clang/test/SemaCUDA/device-var-init.cu b/clang/test/SemaCUDA/device-var-init.cu
index a9e3557c20ebf1a..1555d151c2590af 100644
--- a/clang/test/SemaCUDA/device-var-init.cu
+++ b/clang/test/SemaCUDA/device-var-init.cu
@@ -485,12 +485,3 @@ void instantiate() {
bar<NontrivialInitializer><<<1, 1>>>();
// expected-note at -1 {{in instantiation of function template specialization 'bar<NontrivialInitializer>' requested here}}
}
-
-__device__ void *ptr1 = nullptr;
-__device__ void *ptr2 = ptr1;
-// expected-error at -1 {{dynamic initialization is not supported for __device__, __constant__, __shared__, and __managed__ variables}}
-
-__device__ [[gnu::constructor(101)]] void ctor() {}
-// expected-error at -1 {{CUDA does not support global constructors for __device__ functions}}
-__device__ [[gnu::destructor(101)]] void dtor() {}
-// expected-error at -1 {{CUDA does not support global destructors for __device__ functions}}
diff --git a/libc/cmake/modules/LLVMLibCTestRules.cmake b/libc/cmake/modules/LLVMLibCTestRules.cmake
index f33db5826537bd2..ffbdb40cd5091fa 100644
--- a/libc/cmake/modules/LLVMLibCTestRules.cmake
+++ b/libc/cmake/modules/LLVMLibCTestRules.cmake
@@ -560,12 +560,14 @@ function(add_integration_test test_name)
if(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
- -Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto -nostdlib -static
+ -Wno-multi-gpu -mcpu=${LIBC_GPU_TARGET_ARCHITECTURE} -flto
+ "-Wl,-mllvm,-amdgpu-lower-global-ctor-dtor=0" -nostdlib -static
"-Wl,-mllvm,-amdhsa-code-object-version=${LIBC_GPU_CODE_OBJECT_VERSION}")
elseif(LIBC_TARGET_ARCHITECTURE_IS_NVPTX)
target_link_options(${fq_build_target_name} PRIVATE
${LIBC_COMPILE_OPTIONS_DEFAULT} ${INTEGRATION_TEST_COMPILE_OPTIONS}
"-Wl,--suppress-stack-size-warning" -Wno-multi-gpu
+ "-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1"
"-Wl,-mllvm,-nvptx-emit-init-fini-kernel"
-march=${LIBC_GPU_TARGET_ARCHITECTURE} -nostdlib -static
"--cuda-path=${LIBC_CUDA_ROOT}")
diff --git a/libcxx/test/configs/nvptx-libc++-shared.cfg.in b/libcxx/test/configs/nvptx-libc++-shared.cfg.in
index e07ed35da4d5ad5..9a3ca9c8da95093 100644
--- a/libcxx/test/configs/nvptx-libc++-shared.cfg.in
+++ b/libcxx/test/configs/nvptx-libc++-shared.cfg.in
@@ -10,6 +10,8 @@ config.substitutions.append(('%{link_flags}',
'-nostdlib++ -startfiles -stdlib '
'-L %{lib-dir} -lc++ -lc++abi '
'-Wl,--suppress-stack-size-warning '
+ '-Wl,-mllvm,-nvptx-lower-global-ctor-dtor=1 '
+ '-Wl,-mllvm,-nvptx-emit-init-fini-kernel'
))
config.substitutions.append(('%{exec}',
'%{executor} --no-parallelism'
diff --git a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
index 68a0f4cb0ade9e9..ad1433821036be6 100644
--- a/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp
@@ -91,6 +91,11 @@
using namespace llvm;
+static cl::opt<bool>
+ LowerCtorDtor("nvptx-lower-global-ctor-dtor",
+ cl::desc("Lower GPU ctor / dtors to globals on the device."),
+ cl::init(false), cl::Hidden);
+
#define DEPOTNAME "__local_depot"
/// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V
@@ -789,6 +794,22 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) {
if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30))
report_fatal_error(".alias requires PTX version >= 6.3 and sm_30");
+ // OpenMP supports NVPTX global constructors and destructors.
+ bool IsOpenMP = M.getModuleFlag("openmp") != nullptr;
+
+ if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) &&
+ !LowerCtorDtor && !IsOpenMP) {
+ report_fatal_error(
+ "Module has a nontrivial global ctor, which NVPTX does not support.");
+ return true; // error
+ }
+ if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) &&
+ !LowerCtorDtor && !IsOpenMP) {
+ report_fatal_error(
+ "Module has a nontrivial global dtor, which NVPTX does not support.");
+ return true; // error
+ }
+
// We need to call the parent's one explicitly.
bool Result = AsmPrinter::doInitialization(M);
diff --git a/llvm/test/CodeGen/NVPTX/global-ctor.ll b/llvm/test/CodeGen/NVPTX/global-ctor.ll
new file mode 100644
index 000000000000000..6a833128206cec3
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/global-ctor.ll
@@ -0,0 +1,9 @@
+; RUN: not --crash llc < %s -mtriple=nvptx -mcpu=sm_20 2>&1 | FileCheck %s
+
+; Check that llc dies when given a nonempty global ctor.
+ at llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }]
+
+; CHECK: ERROR: Module has a nontrivial global ctor
+define internal void @foo() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/global-dtor.ll b/llvm/test/CodeGen/NVPTX/global-dtor.ll
new file mode 100644
index 000000000000000..f385d620bba3607
--- /dev/null
+++ b/llvm/test/CodeGen/NVPTX/global-dtor.ll
@@ -0,0 +1,9 @@
+; RUN: not --crash llc < %s -mtriple=nvptx -mcpu=sm_20 2>&1 | FileCheck %s
+
+; Check that llc dies when given a nonempty global dtor.
+ at llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @foo, ptr null }]
+
+; CHECK: ERROR: Module has a nontrivial global dtor
+define internal void @foo() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
index 60b3d70840af591..4ee1ca3ad4b1f0a 100644
--- a/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-ctor-dtor.ll
@@ -8,7 +8,7 @@
; Make sure we get the same result if we run multiple times
; RUN: opt -S -mtriple=nvptx64-- -passes=nvptx-lower-ctor-dtor,nvptx-lower-ctor-dtor < %s | FileCheck %s
-; RUN: llc -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
+; RUN: llc -nvptx-lower-global-ctor-dtor -mtriple=nvptx64-amd-amdhsa -mcpu=sm_70 -filetype=asm -o - < %s | FileCheck %s -check-prefix=VISIBILITY
@llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @foo, ptr null }]
@llvm.global_dtors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @bar, ptr null }]
``````````
</details>
https://github.com/llvm/llvm-project/pull/126610
More information about the llvm-commits
mailing list