[clang] [OpenMP][clang][HIP][CUDA] fix weak alias emit on device compilation (PR #164326)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Oct 20 14:57:06 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-codegen
@llvm/pr-subscribers-clang
Author: None (Jason-VanBeusekom)
<details>
<summary>Changes</summary>
This PR adds checks for when emitting weak aliases in: `void CodeGenModule::EmitGlobal(GlobalDecl GD)`, before for device compilation for OpenMP, HIP and Cuda, clang would look for the aliasee even if it was never marked for device compilation.
For OpenMP the following case now works:
> Failed before when compiling with device, ie: `clang -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa`
> ```
> int __Two(void) { return 2; }
> int Two(void) __attribute__ ((weak, alias("__Two")));
> ```
For HIP / Cuda:
>
> ```
> int __HostFunc(void) { return 42; }
> int HostFunc(void) __attribute__ ((weak, alias("__HostFunc")));
> ```
For HIP:
>Failed before on HIP, Cuda fails due to: `NVPTX aliasee must not be '.weak'` error
> ```
> __device__ int __One(void) { return 2; }
> __device__ int One(void) __attribute__ ((weak, alias("__One")));
> ```
Included are Codegen LIT tests for the above cases, and also cases for weak alias cases that currently work in clang.
Fixes https://github.com/llvm/llvm-project/issues/117369
---
Full diff: https://github.com/llvm/llvm-project/pull/164326.diff
5 Files Affected:
- (modified) clang/lib/CodeGen/CodeGenModule.cpp (+33-1)
- (added) clang/test/CodeGenCUDA/cuda_weak_alias.cu (+36)
- (added) clang/test/CodeGenHIP/hip_weak_alias.cpp (+63)
- (added) clang/test/OpenMP/amdgcn_weak_alias.c (+90)
- (added) clang/test/OpenMP/nvptx_weak_alias.c (+34)
``````````diff
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index c5eb14e329315..ac0de5a221ec7 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -4065,8 +4065,40 @@ void CodeGenModule::EmitGlobal(GlobalDecl GD) {
// If this is an alias definition (which otherwise looks like a declaration)
// emit it now.
- if (Global->hasAttr<AliasAttr>())
+ if (Global->hasAttr<AliasAttr>()) {
+ if (LangOpts.OpenMPIsTargetDevice || LangOpts.CUDA) {
+ const auto *AA = Global->getAttr<AliasAttr>();
+ assert(AA && "Not an alias?");
+ GlobalDecl AliaseeGD;
+ if (!lookupRepresentativeDecl(AA->getAliasee(), AliaseeGD)) {
+ if (LangOpts.CUDA)
+ // Failed to find aliasee on device side, skip emitting
+ return;
+ } else {
+ const auto *AliaseeDecl = dyn_cast<ValueDecl>(AliaseeGD.getDecl());
+ if (LangOpts.OpenMPIsTargetDevice) {
+ if (!AliaseeDecl ||
+ !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
+ AliaseeDecl))
+ // Not a target declaration, skip emitting
+ return;
+ } else {
+ // HIP/CUDA
+ const bool HasDeviceAttr = Global->hasAttr<CUDADeviceAttr>();
+ const bool AliaseeHasDeviceAttr =
+ AliaseeDecl && AliaseeDecl->hasAttr<CUDADeviceAttr>();
+ if (LangOpts.CUDAIsDevice) {
+ if (!HasDeviceAttr || !AliaseeHasDeviceAttr)
+ return;
+ } else if (HasDeviceAttr && AliaseeHasDeviceAttr) {
+ // Alias is only on device side, skip emitting on host side
+ return;
+ }
+ }
+ }
+ }
return EmitAliasDefinition(GD);
+ }
// IFunc like an alias whose value is resolved at runtime by calling resolver.
if (Global->hasAttr<IFuncAttr>())
diff --git a/clang/test/CodeGenCUDA/cuda_weak_alias.cu b/clang/test/CodeGenCUDA/cuda_weak_alias.cu
new file mode 100644
index 0000000000000..fda0ed7e5d74b
--- /dev/null
+++ b/clang/test/CodeGenCUDA/cuda_weak_alias.cu
@@ -0,0 +1,36 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
+// REQUIRES: nvptx-registered-target
+// RUN: %clang_cc1 -x cuda -triple x86_64-unknown-linux-gnu -aux-triple nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+
+extern "C" {
+
+//.
+// HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc
+//.
+// HOST-LABEL: define dso_local i32 @__HostFunc(
+// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 42
+//
+int __HostFunc(void) { return 42; }
+int HostFunc(void) __attribute__ ((weak, alias("__HostFunc")));
+
+}
+
+// HOST-LABEL: define dso_local noundef i32 @main(
+// HOST-SAME: ) #[[ATTR1:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// HOST-NEXT: ret i32 0
+//
+int main() {
+ return 0;
+}
+//.
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// HOST: attributes #[[ATTR1]] = { mustprogress noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/CodeGenHIP/hip_weak_alias.cpp b/clang/test/CodeGenHIP/hip_weak_alias.cpp
new file mode 100644
index 0000000000000..6a57ce1ab74c7
--- /dev/null
+++ b/clang/test/CodeGenHIP/hip_weak_alias.cpp
@@ -0,0 +1,63 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-host.bc
+// RUN: %clang_cc1 -x hip -triple x86_64-unknown-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -x hip -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-linux-gnu -emit-llvm %s -fcuda-is-device -o - | FileCheck %s --check-prefix=DEVICE
+
+#define __device__ __attribute__((device))
+
+extern "C" {
+
+//.
+// HOST: @__hip_cuid_ = global i8 0
+// HOST: @llvm.compiler.used = appending global [1 x ptr] [ptr @__hip_cuid_], section "llvm.metadata"
+// HOST: @HostFunc = weak alias i32 (), ptr @__HostFunc
+//.
+// DEVICE: @__hip_cuid_ = addrspace(1) global i8 0
+// DEVICE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
+// DEVICE: @One = weak alias i32 (), ptr @__One
+//.
+// HOST-LABEL: define dso_local i32 @__HostFunc(
+// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 42
+//
+int __HostFunc(void) { return 42; }
+int HostFunc(void) __attribute__ ((weak, alias("__HostFunc")));
+
+// DEVICE-LABEL: define dso_local i32 @__One(
+// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
+// DEVICE-NEXT: [[ENTRY:.*:]]
+// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// DEVICE-NEXT: ret i32 2
+//
+__device__ int __One(void) { return 2; }
+__device__ int One(void) __attribute__ ((weak, alias("__One")));
+
+}
+
+// HOST-LABEL: define dso_local noundef i32 @main(
+// HOST-SAME: ) #[[ATTR1:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// HOST-NEXT: ret i32 0
+//
+int main() {
+ return 0;
+}
+//.
+// HOST: attributes #[[ATTR0]] = { mustprogress noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+// HOST: attributes #[[ATTR1]] = { mustprogress noinline norecurse nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// DEVICE: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// HOST: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
+// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
+// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"amdgpu_printf_kind", !"hostcall"}
+// DEVICE: [[META2:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// DEVICE: [[META3:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/OpenMP/amdgcn_weak_alias.c b/clang/test/OpenMP/amdgcn_weak_alias.c
new file mode 100644
index 0000000000000..bf8645bef6d78
--- /dev/null
+++ b/clang/test/OpenMP/amdgcn_weak_alias.c
@@ -0,0 +1,90 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -fopenmp -x c -triple amdgcn-amd-amdhsa -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-target-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=DEVICE
+
+//.
+// HOST: @One = weak alias i32 (), ptr @__One
+// HOST: @Two = weak alias i32 (), ptr @__Two
+// HOST: @Three = weak alias i32 (), ptr @__Three
+//.
+// DEVICE: @__omp_rtl_debug_kind = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_teams_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_threads_oversubscription = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_no_thread_state = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @__omp_rtl_assume_no_nested_parallelism = weak_odr hidden addrspace(1) constant i32 0
+// DEVICE: @Two = weak hidden alias i32 (), ptr @__Two
+// DEVICE: @Three = weak hidden alias i32 (), ptr @__Three
+// DEVICE: @Three.1 = weak hidden alias i32 (), ptr @__Three
+//.
+// HOST-LABEL: define dso_local i32 @__One(
+// HOST-SAME: ) #[[ATTR0:[0-9]+]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 1
+//
+int __One(void) { return 1; }
+int One(void) __attribute__ ((weak, alias("__One")));
+
+#pragma omp declare target
+// HOST-LABEL: define dso_local i32 @__Two(
+// HOST-SAME: ) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 2
+//
+// DEVICE-LABEL: define hidden i32 @__Two(
+// DEVICE-SAME: ) #[[ATTR0:[0-9]+]] {
+// DEVICE-NEXT: [[ENTRY:.*:]]
+// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// DEVICE-NEXT: ret i32 2
+//
+int __Two(void) { return 2; }
+int Two(void) __attribute__ ((weak, alias("__Two")));
+#pragma omp end declare target
+
+#pragma omp declare target
+// HOST-LABEL: define dso_local i32 @__Three(
+// HOST-SAME: ) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: ret i32 3
+//
+// DEVICE-LABEL: define hidden i32 @__Three(
+// DEVICE-SAME: ) #[[ATTR0]] {
+// DEVICE-NEXT: [[ENTRY:.*:]]
+// DEVICE-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
+// DEVICE-NEXT: [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
+// DEVICE-NEXT: ret i32 3
+//
+int __Three(void) { return 3; }
+#pragma omp end declare target
+int Three(void) __attribute__ ((weak, alias("__Three")));
+
+
+// HOST-LABEL: define dso_local i32 @main(
+// HOST-SAME: ) #[[ATTR0]] {
+// HOST-NEXT: [[ENTRY:.*:]]
+// HOST-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// HOST-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// HOST-NEXT: ret i32 0
+//
+int main(){
+ return 0;
+}
+
+//.
+// HOST: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// DEVICE: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// HOST: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// HOST: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// HOST: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
+// DEVICE: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
+// DEVICE: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// DEVICE: [[META2:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// DEVICE: [[META3:![0-9]+]] = !{i32 7, !"openmp-device", i32 51}
+// DEVICE: [[META4:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/OpenMP/nvptx_weak_alias.c b/clang/test/OpenMP/nvptx_weak_alias.c
new file mode 100644
index 0000000000000..695bd7d0b8af9
--- /dev/null
+++ b/clang/test/OpenMP/nvptx_weak_alias.c
@@ -0,0 +1,34 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 6
+// REQUIRES: nvptx-registered-target
+
+// RUN: %clang_cc1 -fopenmp -x c -triple x86_64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -o - | FileCheck %s
+
+//.
+// CHECK: @One = weak alias i32 (), ptr @__One
+//.
+// CHECK-LABEL: define dso_local i32 @__One(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: ret i32 1
+//
+int __One(void) { return 1; }
+int One(void) __attribute__ ((weak, alias("__One")));
+
+
+// CHECK-LABEL: define dso_local i32 @main(
+// CHECK-SAME: ) #[[ATTR0]] {
+// CHECK-NEXT: [[ENTRY:.*:]]
+// CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
+// CHECK-NEXT: store i32 0, ptr [[RETVAL]], align 4
+// CHECK-NEXT: ret i32 0
+//
+int main(){
+ return 0;
+}
+//.
+// CHECK: attributes #[[ATTR0]] = { noinline nounwind optnone "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+cx8,+mmx,+sse,+sse2,+x87" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{i32 7, !"openmp", i32 51}
+// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
``````````
</details>
https://github.com/llvm/llvm-project/pull/164326
More information about the cfe-commits
mailing list