[clang] [OpenMP][clang][HIP][CUDA] fix weak alias emit on device compilation (PR #164326)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 20 14:56:02 PDT 2025
https://github.com/Jason-VanBeusekom created https://github.com/llvm/llvm-project/pull/164326
This PR adds checks for when emitting weak aliases in: `void CodeGenModule::EmitGlobal(GlobalDecl GD)`, before for device compilation for OpenMP, HIP and Cuda, clang would look for the aliasee even if it was never marked for device compilation.
For OpenMP the following case now works:
> Failed before when compiling with device, ie: `clang -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa`
> ```
> int __Two(void) { return 2; }
> int Two(void) __attribute__ ((weak, alias("__Two")));
> ```
For HIP / Cuda:
>
> ```
> int __HostFunc(void) { return 42; }
> int HostFunc(void) __attribute__ ((weak, alias("__HostFunc")));
> ```
For HIP:
>Failed before on HIP, Cuda fails due to: `NVPTX aliasee must not be '.weak'` error
> ```
> __device__ int __One(void) { return 2; }
> __device__ int One(void) __attribute__ ((weak, alias("__One")));
> ```
Included are Codegen LIT tests for the above cases, and also cases for weak alias cases that currently work in clang.
Fixes https://github.com/llvm/llvm-project/issues/117369
>From 26b90e0098e62949cf8341cddcb69f3faf7000db Mon Sep 17 00:00:00 2001
From: Jason Van Beusekom <jason.van-beusekom at hpe.com>
Date: Mon, 20 Oct 2025 14:16:10 -0500
Subject: [PATCH] [OpenMP][clang][HIP][CUDA] fix weak alias emit on device
compilation when aliasee is no declared on device Add checks to skip the
emitting of an alias on the device when the aliasee is not declared on the
device. This change effects OpenMP, Hip and Cuda.
---
clang/lib/CodeGen/CodeGenModule.cpp | 34 ++++++++-
clang/test/CodeGenCUDA/cuda_weak_alias.cu | 36 +++++++++
clang/test/CodeGenHIP/hip_weak_alias.cpp | 63 ++++++++++++++++
clang/test/OpenMP/amdgcn_weak_alias.c | 90 +++++++++++++++++++++++
clang/test/OpenMP/nvptx_weak_alias.c | 34 +++++++++
5 files changed, 256 insertions(+), 1 deletion(-)
create mode 100644 clang/test/CodeGenCUDA/cuda_weak_alias.cu
create mode 100644 clang/test/CodeGenHIP/hip_weak_alias.cpp
create mode 100644 clang/test/OpenMP/amdgcn_weak_alias.c
create mode 100644 clang/test/OpenMP/nvptx_weak_alias.c
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c5eb14e329315..ac0de5a221ec7 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4065,8 +4065,40 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// If this is an alias definition (which otherwise looks like a declaration)
// emit it now.
- if (Global->hasAttr<AliasAttr>())
+ if (Global->hasAttr<AliasAttr>()) {
+ if (LangOpts.OpenMPIsTargetDevice || LangOpts.CUDA) {
+ const auto *AA = Global->getAttr<AliasAttr>();
+ assert(AA && "Not an alias?");
+ GlobalDecl AliaseeGD;
+ if (!lookupRepresentativeDecl(AA->getAliasee(), AliaseeGD)) {
+ if (LangOpts.CUDA)
+ // Failed to find aliasee on device side, skip emitting
+ return;
+ } else {
+ const auto *AliaseeDecl = dyn_cast<ValueDecl>(AliaseeGD.getDecl());
+ if (LangOpts.OpenMPIsTargetDevice) {
+ if (!AliaseeDecl ||
+ !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
+ AliaseeDecl))
+ // Not a target declaration, skip emitting
+ return;
+ } else {
+ // HIP/CUDA
+ const bool HasDeviceAttr = Global->hasAttr<CUDADeviceAttr>();
+ const bool AliaseeHasDeviceAttr =
+ AliaseeDecl && AliaseeDecl->hasAttr<CUDADeviceAttr>();
+ if (LangOpts.CUDAIsDevice) {
+ if (!HasDeviceAttr || !AliaseeHasDeviceAttr)
+ return;
+ } else if (HasDeviceAttr && AliaseeHasDeviceAttr) {
+ // Alias is only on device side, skip emitting on host side
+ return;
+ }
+ }
+ }
+ }
return EmitAliasDefinition(GD);
+ }
// IFunc like an alias whose value is resolved at runtime by calling resolver.
if (Global->hasAttr<IFuncAttr>())
diff --git a/clang/test/CodeGenCUDA/cuda_weak_alias.cu b/clang/test/CodeGenCUDA/cuda_weak_alias.cu
new file mode 100644
index 0000000000000..fda0ed7e5d74b
--- /dev/null
+++ b/clang/test/CodeGenCUDA/cuda_weak_alias.cu
@@ -0,0 +1,36 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -x cuda -triple x86_64-unknown-linux-gnu -aux-triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+
+extern "C" {
+
+//.
+// HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc
+//.
+// HOST-LABEL: define dso_local i32 @__HostFunc(
+// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 42
+//
+int __HostFunc(void) { return 42; }
+int HostFunc(void) __attribute__ ((weak, alias("__HostFunc")));
+
+}
+
+// HOST-LABEL: define dso_local noundef i32 @main(
+// HOST-SAME: ) #[[ATTR1:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// HOST-NEXT: ret i32 0
+//
+int main() {
+ return 0;
+}
+//.
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// HOST: attributes #[[ATTR1]] = { mustprogress noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/CodeGenHIP/hip_weak_alias.cpp b/clang/test/CodeGenHIP/hip_weak_alias.cpp
new file mode 100644
index 0000000000000..6a57ce1ab74c7
--- /dev/null
+++ b/clang/test/CodeGenHIP/hip_weak_alias.cpp
@@ -0,0 +1,63 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -emit-llvm %s -fcuda-is-device -o - | FileCheck %s --check-prefix=DEVICE
+
+#define __device__ __attribute__((device))
+
+extern "C" {
+
+//.
+// HOST: @__hip_cuid_ = global i8 0
+// HOST: @llvm.compiler.used = appending global [1 x ptr] [ptr @__hip_cuid_], section "llvm.metadata"
+// HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc
+//.
+// DEVICE: @__hip_cuid_ = addrspace(1) global i8 0
+// DEVICE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
+// DEVICE: @One = weak alias i32 (), ptr @__One
+//.
+// HOST-LABEL: define dso_local i32 @__HostFunc(
+// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 42
+//
+int __HostFunc(void) { return 42; }
+int HostFunc(void) __attribute__ ((weak, alias("__HostFunc")));
+
+// DEVICE-LABEL: define dso_local i32 @__One(
+// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
+// DEVICE-NEXT: [[ENTRY:.*:]]
+// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// DEVICE-NEXT: ret i32 2
+//
+__device__ int __One(void) { return 2; }
+__device__ int One(void) __attribute__ ((weak, alias("__One")));
+
+}
+
+// HOST-LABEL: define dso_local noundef i32 @main(
+// HOST-SAME: ) #[[ATTR1:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// HOST-NEXT: ret i32 0
+//
+int main() {
+ return 0;
+}
+//.
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// HOST: attributes #[[ATTR1]] = { mustprogress noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
+// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
+// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+// DEVICE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// DEVICE: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/OpenMP/amdgcn_weak_alias.c b/clang/test/OpenMP/amdgcn_weak_alias.c
new file mode 100644
index 0000000000000..bf8645bef6d78
--- /dev/null
+++ b/clang/test/OpenMP/amdgcn_weak_alias.c
@@ -0,0 +1,90 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -fopenmp -x c -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=DEVICE
+
+//.
+// HOST: @One = weak alias i32 (), ptr @__One
+// HOST: @Two = weak alias i32 (), ptr @__Two
+// HOST: @Three = weak alias i32 (), ptr @__Three
+//.
+// DEVICE: @__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @Two = weak hidden alias i32 (), ptr @__Two
+// DEVICE: @Three = weak hidden alias i32 (), ptr @__Three
+// DEVICE: @Three.1 = weak hidden alias i32 (), ptr @__Three
+//.
+// HOST-LABEL: define dso_local i32 @__One(
+// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 1
+//
+int __One(void) { return 1; }
+int One(void) __attribute__ ((weak, alias("__One")));
+
+#pragma omp declare target
+// HOST-LABEL: define dso_local i32 @__Two(
+// HOST-SAME: ) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 2
+//
+// DEVICE-LABEL: define hidden i32 @__Two(
+// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
+// DEVICE-NEXT: [[ENTRY:.*:]]
+// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// DEVICE-NEXT: ret i32 2
+//
+int __Two(void) { return 2; }
+int Two(void) __attribute__ ((weak, alias("__Two")));
+#pragma omp end declare target
+
+#pragma omp declare target
+// HOST-LABEL: define dso_local i32 @__Three(
+// HOST-SAME: ) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 3
+//
+// DEVICE-LABEL: define hidden i32 @__Three(
+// DEVICE-SAME: ) #[[ATTR0]] {
+// DEVICE-NEXT: [[ENTRY:.*:]]
+// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// DEVICE-NEXT: ret i32 3
+//
+int __Three(void) { return 3; }
+#pragma omp end declare target
+int Three(void) __attribute__ ((weak, alias("__Three")));
+
+
+// HOST-LABEL: define dso_local i32 @main(
+// HOST-SAME: ) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// HOST-NEXT: ret i32 0
+//
+int main(){
+ return 0;
+}
+
+//.
+// HOST: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// DEVICE: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// HOST: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// HOST: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
+// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
+// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// DEVICE: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// DEVICE: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 51}
+// DEVICE: [[META4:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/OpenMP/nvptx_weak_alias.c b/clang/test/OpenMP/nvptx_weak_alias.c
new file mode 100644
index 0000000000000..695bd7d0b8af9
--- /dev/null
+++ b/clang/test/OpenMP/nvptx_weak_alias.c
@@ -0,0 +1,34 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s
+
+//.
+// CHECK: @One = weak alias i32 (), ptr @__One
+//.
+// CHECK-LABEL: define dso_local i32 @__One(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret i32 1
+//
+int __One(void) { return 1; }
+int One(void) __attribute__ ((weak, alias("__One")));
+
+
+// CHECK-LABEL: define dso_local i32 @main(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// CHECK-NEXT: ret i32 0
+//
+int main(){
+ return 0;
+}
+//.
+// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
More information about the cfe-commits
mailing list