[clang] clang/AMDGPU: Do not emit __oclc_ABI_version references with environment (PR #184868)
Matt Arsenault via cfe-commits
cfe-commits at lists.llvm.org
Thu Mar 5 12:01:37 PST 2026
https://github.com/arsenm updated https://github.com/llvm/llvm-project/pull/184868
>From 47173669a868b5263486482df979fb69f242f0d4 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 5 Mar 2026 20:54:57 +0100
Subject: [PATCH 1/2] clang/AMDGPU: Do not emit __oclc_ABI_version references
with environment
Assume a sufficently new code object version if the environment is set to
something indicating we should have a real library.
---
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 5 ++
clang/test/CodeGen/amdgpu-abi-version.c | 59 ++++++++++++++-------
2 files changed, 44 insertions(+), 20 deletions(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index bff1ed3d2ec19..159327408fbd0 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -89,6 +89,11 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
+ // Do not emit __oclc_ABI_version references with non-empt environment.
+ if (Cov == CodeObjectVersionKind::COV_None &&
+ CGF.getTarget().getTriple().hasEnvironment())
+ Cov = CodeObjectVersionKind::COV_6;
+
if (Cov == CodeObjectVersionKind::COV_None) {
StringRef Name = "__oclc_ABI_version";
auto *ABIVersionC = CGF.CGM.getModule().getNamedGlobal(Name);
diff --git a/clang/test/CodeGen/amdgpu-abi-version.c b/clang/test/CodeGen/amdgpu-abi-version.c
index c8bc7d0f04561..9b7011f36f523 100644
--- a/clang/test/CodeGen/amdgpu-abi-version.c
+++ b/clang/test/CodeGen/amdgpu-abi-version.c
@@ -1,29 +1,48 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
-// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s
+// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck -check-prefixes=CHECK,LLVM %s
+// RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa-llvm -emit-llvm -mcode-object-version=none %s -o - | FileCheck -check-prefixes=CHECK,LLVMENV %s
//.
-// CHECK: @__oclc_ABI_version = external addrspace(4) global i32
+// LLVM: @__oclc_ABI_version = external addrspace(4) global i32
//.
-// CHECK-LABEL: define dso_local i32 @foo(
-// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT: [[ENTRY:.*:]]
-// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
-// CHECK-NEXT: [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500
-// CHECK-NEXT: [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i32 12
-// CHECK-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4
-// CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]]
-// CHECK-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG1:![0-9]+]], !invariant.load [[META2:![0-9]+]], !noundef [[META2]]
-// CHECK-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32
-// CHECK-NEXT: ret i32 [[CONV]]
+// LLVM-LABEL: define dso_local i32 @foo(
+// LLVM-SAME: ) #[[ATTR0:[0-9]+]] {
+// LLVM-NEXT: [[ENTRY:.*:]]
+// LLVM-NEXT: [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
+// LLVM-NEXT: [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500
+// LLVM-NEXT: [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LLVM-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP2]], i32 12
+// LLVM-NEXT: [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LLVM-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4
+// LLVM-NEXT: [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]]
+// LLVM-NEXT: [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG1:![0-9]+]], !invariant.load [[META2:![0-9]+]], !noundef [[META2]]
+// LLVM-NEXT: [[CONV:%.*]] = zext i16 [[TMP7]] to i32
+// LLVM-NEXT: ret i32 [[CONV]]
+//
+// LLVMENV-LABEL: define dso_local i32 @foo(
+// LLVMENV-SAME: ) #[[ATTR0:[0-9]+]] {
+// LLVMENV-NEXT: [[ENTRY:.*:]]
+// LLVMENV-NEXT: [[TMP0:%.*]] = call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LLVMENV-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i32 12
+// LLVMENV-NEXT: [[TMP2:%.*]] = load i16, ptr addrspace(4) [[TMP1]], align 2, !range [[RNG1:![0-9]+]], !invariant.load [[META2:![0-9]+]], !noundef [[META2]]
+// LLVMENV-NEXT: [[CONV:%.*]] = zext i16 [[TMP2]] to i32
+// LLVMENV-NEXT: ret i32 [[CONV]]
//
int foo() { return __builtin_amdgcn_workgroup_size_x(); }
//.
-// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
-// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+// LLVM: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// LLVM: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+//.
+// LLVMENV: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// LLVMENV: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+//.
+// LLVM: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+// LLVM: [[RNG1]] = !{i16 1, i16 1025}
+// LLVM: [[META2]] = !{}
//.
-// CHECK: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-// CHECK: [[RNG1]] = !{i16 1, i16 1025}
-// CHECK: [[META2]] = !{}
+// LLVMENV: [[META0:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+// LLVMENV: [[RNG1]] = !{i16 1, i16 1025}
+// LLVMENV: [[META2]] = !{}
//.
+//// NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+// CHECK: {{.*}}
>From f8087adb55d61c5e9ebbd587f97693428a9f2520 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault at amd.com>
Date: Thu, 5 Mar 2026 21:01:29 +0100
Subject: [PATCH 2/2] typo
---
clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index 159327408fbd0..72d5cb8040119 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -89,7 +89,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
auto Cov = CGF.getTarget().getTargetOpts().CodeObjectVersion;
- // Do not emit __oclc_ABI_version references with non-empt environment.
+ // Do not emit __oclc_ABI_version references with non-empty environment.
if (Cov == CodeObjectVersionKind::COV_None &&
CGF.getTarget().getTriple().hasEnvironment())
Cov = CodeObjectVersionKind::COV_6;
More information about the cfe-commits
mailing list