[clang] [compiler-rt] [libc] [libcxx] [llvm] [Clang][AMDGPU] Remove special handling for COV4 libraries (PR #132870)

Joseph Huber via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 25 06:59:18 PDT 2025


https://github.com/jhuber6 updated https://github.com/llvm/llvm-project/pull/132870

>From 09c8fbf137fc975029fa9dee89af0d52ac2876b5 Mon Sep 17 00:00:00 2001
From: Joseph Huber <huberjn at outlook.com>
Date: Mon, 24 Mar 2025 22:36:04 -0500
Subject: [PATCH] [Clang][AMDGPU] Remove special handling for COV4 libraries

Summary:
When we were first porting to COV5, this lead to some ABI issues due to
a change in how we looked up the work group size. Bitcode libraries
relied on the builtins to emit code, but this was changed between
versions. This prevented the bitcode libraries, like OpenMP or libc,
from being used for both COV4 and COV5. The solution was to have this
'none' functionality which effectively emitted code that branched off of
a global to resolve to either version.

This isn't a great solution because it forced every TU to have this
variable in it. The patch in
https://github.com/llvm/llvm-project/pull/131033 removed support for
COV4 from OpenMP, which was the only consumer of this functionality.
Other users like HIP and OpenCL did not use this because they linked the
ROCm Device Library directly which has its own handling (The name was
borrowed from it after all).

So, now that we don't need to worry about backward compatibility with
COV4, we can remove this special handling. Users can still emit COV4
code, this simply removes the special handling used to make the OpenMP
device runtime bitcode version agnostic.
---
 clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp   |   3 -
 clang/lib/CodeGen/Targets/AMDGPU.cpp          |  36 -----
 clang/test/CodeGen/amdgpu-abi-version.c       |  15 +-
 clang/test/CodeGen/amdgpu-address-spaces.cpp  |   8 +-
 .../amdgpu-code-object-version-linking.cu     | 133 ------------------
 .../test/CodeGenCUDA/amdgpu-workgroup-size.cu |  34 -----
 .../CodeGenCXX/dynamic-cast-address-space.cpp |   1 -
 clang/test/CodeGenHIP/default-attributes.hip  |   1 -
 .../CodeGenOpenCL/amdgpu-enqueue-kernel.cl    |   1 -
 .../amdgcn_target_global_constructor.cpp      |   1 -
 compiler-rt/cmake/builtin-config-ix.cmake     |   1 -
 compiler-rt/lib/builtins/CMakeLists.txt       |   6 -
 .../modules/LLVMLibCCompileOptionRules.cmake  |   2 -
 libcxx/cmake/caches/AMDGPU.cmake              |   6 +-
 offload/DeviceRTL/CMakeLists.txt              |   2 +-
 15 files changed, 22 insertions(+), 228 deletions(-)
 delete mode 100644 clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu

diff --git a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
index f94917c905081..e33f556774d46 100644
--- a/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
@@ -65,9 +65,6 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
 /// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
 ///            and use its value for COV_4 or COV_5+ approach. It is used for
 ///            compiling device libraries in an ABI-agnostic way.
-///
-/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
-///       clang during compilation of user code.
 Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
   llvm::LoadInst *LD;
 
diff --git a/clang/lib/CodeGen/Targets/AMDGPU.cpp b/clang/lib/CodeGen/Targets/AMDGPU.cpp
index e3c8243cbb0b5..db2a2c5740646 100644
--- a/clang/lib/CodeGen/Targets/AMDGPU.cpp
+++ b/clang/lib/CodeGen/Targets/AMDGPU.cpp
@@ -305,8 +305,6 @@ class AMDGPUTargetCodeGenInfo : public TargetCodeGenInfo {
   void setFunctionDeclAttributes(const FunctionDecl *FD, llvm::Function *F,
                                  CodeGenModule &CGM) const;
 
-  void emitTargetGlobals(CodeGen::CodeGenModule &CGM) const override;
-
   void setTargetAttributes(const Decl *D, llvm::GlobalValue *GV,
                            CodeGen::CodeGenModule &M) const override;
   unsigned getOpenCLKernelCallingConv() const override;
@@ -414,40 +412,6 @@ void AMDGPUTargetCodeGenInfo::setFunctionDeclAttributes(
   }
 }
 
-/// Emits control constants used to change per-architecture behaviour in the
-/// AMDGPU ROCm device libraries.
-void AMDGPUTargetCodeGenInfo::emitTargetGlobals(
-    CodeGen::CodeGenModule &CGM) const {
-  StringRef Name = "__oclc_ABI_version";
-  llvm::GlobalVariable *OriginalGV = CGM.getModule().getNamedGlobal(Name);
-  if (OriginalGV && !llvm::GlobalVariable::isExternalLinkage(OriginalGV->getLinkage()))
-    return;
-
-  if (CGM.getTarget().getTargetOpts().CodeObjectVersion ==
-      llvm::CodeObjectVersionKind::COV_None)
-    return;
-
-  auto *Type = llvm::IntegerType::getIntNTy(CGM.getModule().getContext(), 32);
-  llvm::Constant *COV = llvm::ConstantInt::get(
-      Type, CGM.getTarget().getTargetOpts().CodeObjectVersion);
-
-  // It needs to be constant weak_odr without externally_initialized so that
-  // the load instuction can be eliminated by the IPSCCP.
-  auto *GV = new llvm::GlobalVariable(
-      CGM.getModule(), Type, true, llvm::GlobalValue::WeakODRLinkage, COV, Name,
-      nullptr, llvm::GlobalValue::ThreadLocalMode::NotThreadLocal,
-      CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant));
-  GV->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Local);
-  GV->setVisibility(llvm::GlobalValue::VisibilityTypes::HiddenVisibility);
-
-  // Replace any external references to this variable with the new global.
-  if (OriginalGV) {
-    OriginalGV->replaceAllUsesWith(GV);
-    GV->takeName(OriginalGV);
-    OriginalGV->eraseFromParent();
-  }
-}
-
 void AMDGPUTargetCodeGenInfo::setTargetAttributes(
     const Decl *D, llvm::GlobalValue *GV, CodeGen::CodeGenModule &M) const {
   if (requiresAMDGPUProtectedVisibility(D, GV)) {
diff --git a/clang/test/CodeGen/amdgpu-abi-version.c b/clang/test/CodeGen/amdgpu-abi-version.c
index 4e5ad87655f23..b9c1de0521b95 100644
--- a/clang/test/CodeGen/amdgpu-abi-version.c
+++ b/clang/test/CodeGen/amdgpu-abi-version.c
@@ -1,4 +1,4 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --version 3
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals all --version 5
 // RUN: %clang_cc1 -cc1 -triple amdgcn-amd-amdhsa -emit-llvm -mcode-object-version=none %s -o - | FileCheck %s
 
 //.
@@ -6,7 +6,7 @@
 //.
 // CHECK-LABEL: define dso_local i32 @foo(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  entry:
+// CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) @__oclc_ABI_version, align 4
@@ -16,8 +16,17 @@
 // CHECK-NEXT:    [[TMP4:%.*]] = call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
 // CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP4]], i32 4
 // CHECK-NEXT:    [[TMP6:%.*]] = select i1 [[TMP1]], ptr addrspace(4) [[TMP3]], ptr addrspace(4) [[TMP5]]
-// CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load !3, !noundef !3
+// CHECK-NEXT:    [[TMP7:%.*]] = load i16, ptr addrspace(4) [[TMP6]], align 2, !range [[RNG2:![0-9]+]], !invariant.load [[META3:![0-9]+]], !noundef [[META3]]
 // CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP7]] to i32
 // CHECK-NEXT:    ret i32 [[CONV]]
 //
 int foo() { return __builtin_amdgcn_workgroup_size_x(); }
+//.
+// CHECK: attributes #[[ATTR0]] = { convergent noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+// CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META1:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+// CHECK: [[RNG2]] = !{i16 1, i16 1025}
+// CHECK: [[META3]] = !{}
+//.
diff --git a/clang/test/CodeGen/amdgpu-address-spaces.cpp b/clang/test/CodeGen/amdgpu-address-spaces.cpp
index b121b559f58dc..1d8668a7f0917 100644
--- a/clang/test/CodeGen/amdgpu-address-spaces.cpp
+++ b/clang/test/CodeGen/amdgpu-address-spaces.cpp
@@ -29,7 +29,6 @@ int [[clang::address_space(999)]] bbb = 1234;
 // CHECK: @u = addrspace(5) global i32 undef, align 4
 // CHECK: @aaa = addrspace(6) global i32 1000, align 4
 // CHECK: @bbb = addrspace(999) global i32 1234, align 4
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 // CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
@@ -60,3 +59,10 @@ extern "C" [[clang::amdgpu_kernel]] void foo() {
   aaa = 0;
   bbb = 0;
 }
+//.
+// CHECK: attributes #[[ATTR0]] = { convergent mustprogress noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" }
+//.
+// CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
+// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
+// CHECK: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
+//.
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
deleted file mode 100644
index cb467886c016c..0000000000000
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
+++ /dev/null
@@ -1,133 +0,0 @@
-// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
-// RUN:   -mcode-object-version=4 -DUSER -x hip -o %t_4.bc %s
-
-// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
-// RUN:   -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s
-
-// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
-// RUN:   -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s
-
-// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
-// RUN:   -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
-// RUN:   %t_4.bc -mlink-builtin-bitcode %t_0.bc -o - |\
-// RUN:   FileCheck -check-prefix=LINKED4 %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
-// RUN:   %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\
-// RUN:   FileCheck -check-prefix=LINKED5 %s
-
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
-// RUN:   %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\
-// RUN:   FileCheck -check-prefix=LINKED6 %s
-
-#include "Inputs/cuda.h"
-
-// LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
-// LINKED4-LABEL: bar
-// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
-// LINKED4-NOT: icmp sge i32 %{{.*}}, 500
-// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// LINKED4: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
-// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// LINKED4: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
-// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
-// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
-// LINKED4-NOT: icmp sge i32 %{{.*}}, 500
-// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// LINKED4: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
-// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// LINKED4: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
-// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
-// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// LINKED4-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
-// LINKED4-NOT: icmp sge i32 %{{.*}}, 500
-// LINKED4: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// LINKED4: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
-// LINKED4: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// LINKED4: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
-// LINKED4: select i1 false, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
-// LINKED4: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// LINKED4: "amdhsa_code_object_version", i32 400
-
-// LINKED5: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 500
-// LINKED5-LABEL: bar
-// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
-// LINKED5-NOT: icmp sge i32 %{{.*}}, 500
-// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// LINKED5: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
-// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// LINKED5: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
-// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
-// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
-// LINKED5-NOT: icmp sge i32 %{{.*}}, 500
-// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// LINKED5: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
-// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// LINKED5: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
-// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
-// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// LINKED5-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
-// LINKED5-NOT: icmp sge i32 %{{.*}}, 500
-// LINKED5: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// LINKED5: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
-// LINKED5: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// LINKED5: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
-// LINKED5: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
-// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// LINKED5: "amdhsa_code_object_version", i32 500
-
-// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
-// LINKED6-LABEL: bar
-// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
-// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
-// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
-// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
-// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
-// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
-// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
-// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
-// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
-// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
-// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
-// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
-// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
-// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
-// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
-// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-// LINKED6: "amdhsa_code_object_version", i32 600
-
-#ifdef DEVICELIB
-__device__ void bar(int *x, int *y, int *z)
-{
-  *x = __builtin_amdgcn_workgroup_size_x();
-  *y = __builtin_amdgcn_workgroup_size_y();
-  *z = __builtin_amdgcn_workgroup_size_z();
-}
-#endif
-
-#ifdef USER
-__device__ void bar(int *x, int *y, int *z);
-__device__ void foo()
-{
-  int *x, *y, *z;
-  bar(x, y, z);
-}
-#endif
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index f42b69f492ff8..2d3730cdfc8c9 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -2,7 +2,6 @@
 // RUN:     -fcuda-is-device -mcode-object-version=4 -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=PRECOV5 %s
 
-
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
 // RUN:     -fcuda-is-device -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=COV5 %s
@@ -11,10 +10,6 @@
 // RUN:     -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
 // RUN:     | FileCheck -check-prefix=COV5 %s
 
-// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
-// RUN:     -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
-// RUN:     | FileCheck -check-prefix=COVNONE %s
-
 #include "Inputs/cuda.h"
 
 // PRECOV5-LABEL: test_get_workgroup_size
@@ -35,35 +30,6 @@
 // COV5: getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
 // COV5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
 
-
-// COVNONE-LABEL: test_get_workgroup_size
-// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
-// COVNONE: [[ABI5_X:%.*]] = icmp sge i32 %{{.*}}, 500
-// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// COVNONE: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
-// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// COVNONE: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
-// COVNONE: select i1 [[ABI5_X]], ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
-// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
-// COVNONE: [[ABI5_Y:%.*]] = icmp sge i32 %{{.*}}, 500
-// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// COVNONE: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
-// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// COVNONE: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
-// COVNONE: select i1 [[ABI5_Y]], ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
-// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
-// COVNONE: load i32, ptr addrspace(4) @__oclc_ABI_version
-// COVNONE: [[ABI5_Z:%.*]] = icmp sge i32 %{{.*}}, 500
-// COVNONE: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
-// COVNONE: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
-// COVNONE: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
-// COVNONE: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
-// COVNONE: select i1 [[ABI5_Z]], ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
-// COVNONE: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
-
 __device__ void test_get_workgroup_size(int d, int *out)
 {
   switch (d) {
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 5d49cc0544b9c..8d50c71feb990 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -13,7 +13,6 @@ B fail;
 // CHECK: @_ZTI1B = linkonce_odr addrspace(1) constant { ptr addrspace(1), ptr addrspace(1), ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds (ptr addrspace(1), ptr addrspace(1) @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2), ptr addrspace(1) @_ZTS1B, ptr addrspace(1) @_ZTI1A }, comdat, align 8
 // CHECK: @_ZTVN10__cxxabiv120__si_class_type_infoE = external addrspace(1) global [0 x ptr addrspace(1)]
 // CHECK: @_ZTS1B = linkonce_odr addrspace(1) constant [3 x i8] c"1B\00", comdat, align 1
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 // WITH-NONZERO-DEFAULT-AS: @_ZTV1B = linkonce_odr unnamed_addr addrspace(1) constant { [3 x ptr addrspace(1)] } { [3 x ptr addrspace(1)] [ptr addrspace(1) null, ptr addrspace(1) @_ZTI1B, ptr addrspace(1) addrspacecast (ptr addrspace(4) @_ZN1A1fEv to ptr addrspace(1))] }, comdat, align 8
 // WITH-NONZERO-DEFAULT-AS: @fail = addrspace(1) global { ptr addrspace(1) } { ptr addrspace(1) getelementptr inbounds inrange(-16, 8) ({ [3 x ptr addrspace(1)] }, ptr addrspace(1) @_ZTV1B, i32 0, i32 0, i32 2) }, align 8
diff --git a/clang/test/CodeGenHIP/default-attributes.hip b/clang/test/CodeGenHIP/default-attributes.hip
index f4dbad021987f..9aa40f18696c8 100644
--- a/clang/test/CodeGenHIP/default-attributes.hip
+++ b/clang/test/CodeGenHIP/default-attributes.hip
@@ -8,7 +8,6 @@
 //.
 // OPTNONE: @__hip_cuid_ = addrspace(1) global i8 0
 // OPTNONE: @llvm.compiler.used = appending addrspace(1) global [1 x ptr] [ptr addrspacecast (ptr addrspace(1) @__hip_cuid_ to ptr)], section "llvm.metadata"
-// OPTNONE: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 __device__ void extern_func();
 
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 7e847367e1a13..caae5666de29e 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -68,7 +68,6 @@ kernel void test_target_features_kernel(global int *i) {
 // CHECK: @__block_literal_global = internal addrspace(1) constant { i32, i32, ptr } { i32 16, i32 8, ptr @__test_target_features_kernel_block_invoke }, align 8 #0
 // CHECK: @__test_target_features_kernel_block_invoke_kernel.runtime.handle = internal addrspace(1) externally_initialized constant %block.runtime.handle.t.3 zeroinitializer, section ".amdgpu.kernel.runtime.handle"
 // CHECK: @llvm.used = appending addrspace(1) global [10 x ptr] [ptr @__test_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_kernel.runtime.handle to ptr), ptr @__test_block_invoke_2_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_2_kernel.runtime.handle to ptr), ptr @__test_block_invoke_3_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_3_kernel.runtime.handle to ptr), ptr @__test_block_invoke_4_kernel, ptr addrspacecast (ptr addrspace(1) @__test_block_invoke_4_kernel.runtime.handle to ptr), ptr @__test_target_features_kernel_block_invoke_kernel, ptr addrspacecast (ptr addrspace(1) @__test_target_features_kernel_block_invoke_kernel.runtime.handle to ptr)], section "llvm.metadata"
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
 // NOCPU-LABEL: define {{[^@]+}}@callee
diff --git a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
index d728dc1233e2c..bbfb0c4d9b11e 100644
--- a/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
+++ b/clang/test/OpenMP/amdgcn_target_global_constructor.cpp
@@ -29,7 +29,6 @@ S A;
 // CHECK: @A = addrspace(1) global %struct.S zeroinitializer, align 4
 // CHECK: @llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_amdgcn_target_global_constructor.cpp, ptr null }]
 // CHECK: @llvm.global_dtors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @__dtor_A, ptr null }]
-// CHECK: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
 //.
 // CHECK-LABEL: define {{[^@]+}}@__cxx_global_var_init
 // CHECK-SAME: () #[[ATTR0:[0-9]+]] {
diff --git a/compiler-rt/cmake/builtin-config-ix.cmake b/compiler-rt/cmake/builtin-config-ix.cmake
index b1bde47ec8555..3bc07b04f08f2 100644
--- a/compiler-rt/cmake/builtin-config-ix.cmake
+++ b/compiler-rt/cmake/builtin-config-ix.cmake
@@ -22,7 +22,6 @@ builtin_check_c_compiler_flag(-Wno-pedantic         COMPILER_RT_HAS_WNO_PEDANTIC
 builtin_check_c_compiler_flag(-nogpulib             COMPILER_RT_HAS_NOGPULIB_FLAG)
 builtin_check_c_compiler_flag(-flto                 COMPILER_RT_HAS_FLTO_FLAG)
 builtin_check_c_compiler_flag(-fconvergent-functions COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG)
-builtin_check_c_compiler_flag("-Xclang -mcode-object-version=none" COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG)
 builtin_check_c_compiler_flag(-Wbuiltin-declaration-mismatch COMPILER_RT_HAS_WBUILTIN_DECLARATION_MISMATCH_FLAG)
 builtin_check_c_compiler_flag(/Zl COMPILER_RT_HAS_ZL_FLAG)
 
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 19316c52d12ce..626b21e30ed6b 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -833,12 +833,6 @@ else ()
     append_list_if(COMPILER_RT_HAS_FLTO_FLAG -flto BUILTIN_CFLAGS)
     append_list_if(COMPILER_RT_HAS_FCONVERGENT_FUNCTIONS_FLAG
                    -fconvergent-functions BUILTIN_CFLAGS)
-
-    # AMDGPU targets want to use a generic ABI.
-    if("${COMPILER_RT_DEFAULT_TARGET_ARCH}" MATCHES "amdgcn")
-      append_list_if(COMPILER_RT_HAS_CODE_OBJECT_VERSION_FLAG
-                     "SHELL:-Xclang -mcode-object-version=none" BUILTIN_CFLAGS)
-    endif()
   endif()
 
   set(BUILTIN_DEFS "")
diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
index 0facb0b9be0c1..ddd18ef293c8d 100644
--- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
+++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
@@ -215,8 +215,6 @@ function(_get_common_compile_options output_var flags)
       if(LIBC_CUDA_ROOT)
         list(APPEND compile_options "--cuda-path=${LIBC_CUDA_ROOT}")
       endif()
-    elseif(LIBC_TARGET_ARCHITECTURE_IS_AMDGPU)
-      list(APPEND compile_options "SHELL:-Xclang -mcode-object-version=none")
     endif()
   endif()
   set(${output_var} ${compile_options} PARENT_SCOPE)
diff --git a/libcxx/cmake/caches/AMDGPU.cmake b/libcxx/cmake/caches/AMDGPU.cmake
index e7bf3f53891f0..d4aa28b4134ea 100644
--- a/libcxx/cmake/caches/AMDGPU.cmake
+++ b/libcxx/cmake/caches/AMDGPU.cmake
@@ -32,8 +32,6 @@ set(LIBCXX_TEST_CONFIG "amdgpu-libc++-shared.cfg.in" CACHE STRING "")
 set(LIBCXX_TEST_PARAMS "optimization=none;long_tests=False;executor=amdhsa-loader" CACHE STRING "")
 
 # Necessary compile flags for AMDGPU.
-set(LIBCXX_ADDITIONAL_COMPILE_FLAGS
-    "-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "")
-set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS
-    "-nogpulib;-flto;-fconvergent-functions;SHELL:-Xclang -mcode-object-version=none" CACHE STRING "")
+set(LIBCXX_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "")
+set(LIBCXXABI_ADDITIONAL_COMPILE_FLAGS "-nogpulib;-flto;-fconvergent-functions" CACHE STRING "")
 set(CMAKE_REQUIRED_FLAGS "-nogpulib" CACHE STRING "")
diff --git a/offload/DeviceRTL/CMakeLists.txt b/offload/DeviceRTL/CMakeLists.txt
index 8f2a1fd01fabc..07888217b6c68 100644
--- a/offload/DeviceRTL/CMakeLists.txt
+++ b/offload/DeviceRTL/CMakeLists.txt
@@ -255,7 +255,7 @@ function(compileDeviceRTLLibrary target_name target_triple)
 endfunction()
 
 add_custom_target(omptarget.devicertl.amdgpu)
-compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa -Xclang -mcode-object-version=none)
+compileDeviceRTLLibrary(amdgpu amdgcn-amd-amdhsa)
 
 add_custom_target(omptarget.devicertl.nvptx)
 compileDeviceRTLLibrary(nvptx nvptx64-nvidia-cuda --cuda-feature=+ptx63)



More information about the llvm-commits mailing list