[flang-commits] [llvm] [lld] [flang] [clang] [AMDGPU] Introduce Code Object V6 (PR #76954)
via flang-commits
flang-commits at lists.llvm.org
Thu Jan 4 05:59:48 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
@llvm/pr-subscribers-clang
Author: Pierre van Houtryve (Pierre-vh)
<details>
<summary>Changes</summary>
Introduce Code Object V6 in Clang, LLD, Flang and LLVM. This is the same as V5 except a new "generic version" flag can be present in EFLAGS. This is related to new generic targets that'll be added in a follow-up patch. It's also likely V6 will have new changes (possibly new metadata entries) added later.
Docs change are not included, I'm planning to do them in a follow-up patch all at once (when generic targets land too).
---
Patch is 90.40 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76954.diff
49 Files Affected:
- (modified) clang/include/clang/Driver/Options.td (+2-2)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+3-3)
- (modified) clang/lib/Driver/ToolChains/CommonArgs.cpp (+1-1)
- (modified) clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu (+37)
- (modified) clang/test/CodeGenCUDA/amdgpu-code-object-version.cu (+4)
- (modified) clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu (+4)
- (added) clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc ()
- (modified) clang/test/Driver/hip-code-object-version.hip (+12)
- (modified) clang/test/Driver/hip-device-libs.hip (+17-1)
- (modified) flang/lib/Frontend/CompilerInvocation.cpp (+2)
- (modified) flang/test/Lower/AMD/code-object-version.f90 (+2-1)
- (modified) lld/ELF/Arch/AMDGPU.cpp (+22)
- (modified) lld/test/ELF/amdgpu-tid.s (+16)
- (modified) llvm/include/llvm/BinaryFormat/ELF.h (+11-1)
- (modified) llvm/include/llvm/Support/AMDGPUMetadata.h (+5)
- (modified) llvm/include/llvm/Support/ScopedPrinter.h (+3-1)
- (modified) llvm/include/llvm/Target/TargetOptions.h (+1)
- (modified) llvm/lib/ObjectYAML/ELFYAML.cpp (+6)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp (+3)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp (+10)
- (modified) llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h (+10-1)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp (+27)
- (modified) llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h (+1)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp (+13)
- (modified) llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h (+4-1)
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll (+2)
- (modified) llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll (+2)
- (modified) llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll (+4)
- (modified) llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll (+2)
- (modified) llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll (+46)
- (modified) llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/recursion.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll (+1)
- (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll (+6)
- (modified) llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll (+6)
- (modified) llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s (+5)
- (modified) llvm/tools/llvm-readobj/ELFDumper.cpp (+99-123)
``````````diff
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 2b93ddf033499c..0bfe0e7739960e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4753,9 +4753,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
- Values<"none,4,5">,
+ Values<"none,4,5,6">,
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
- NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
+ NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>,
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
defm cumode : SimpleMFlag<"cumode",
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f71dbf1729a1d6..be86731ed912ea 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17481,9 +17481,9 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
/// Emit code based on Code Object ABI version.
/// COV_4 : Emit code to use dispatch ptr
-/// COV_5 : Emit code to use implicitarg ptr
+/// COV_5+ : Emit code to use implicitarg ptr
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
-/// and use its value for COV_4 or COV_5 approach. It is used for
+/// and use its value for COV_4 or COV_5+ approach. It is used for
/// compiling device libraries in an ABI-agnostic way.
///
/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
@@ -17526,7 +17526,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
} else {
Value *GEP = nullptr;
- if (Cov == CodeObjectVersionKind::COV_5) {
+ if (Cov >= CodeObjectVersionKind::COV_5) {
// Indexing the implicit kernarg segment.
GEP = CGF.Builder.CreateConstGEP1_32(
CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 2340191ca97d98..75582f6b5669d5 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2585,7 +2585,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
const unsigned MinCodeObjVer = 4;
- const unsigned MaxCodeObjVer = 5;
+ const unsigned MaxCodeObjVer = 6;
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
if (CodeObjArg->getOption().getID() ==
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
index 663687ae227f23..d33acdf7eb8bed 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
@@ -4,6 +4,9 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
// RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s
+// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
+// RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s
+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
// RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s
@@ -15,6 +18,10 @@
// RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\
// RUN: FileCheck -check-prefix=LINKED5 %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
+// RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\
+// RUN: FileCheck -check-prefix=LINKED6 %s
+
#include "Inputs/cuda.h"
// LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
@@ -77,6 +84,36 @@
// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// LINKED5: "amdgpu_code_object_version", i32 500
+// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
+// LINKED6-LABEL: bar
+// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
+// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
+// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
+// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
+// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
+// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+
+// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
+// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
+// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
+// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
+// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
+// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+
+// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
+// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
+// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
+// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
+// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
+// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+// LINKED6: "amdgpu_code_object_version", i32 600
+
#ifdef DEVICELIB
__device__ void bar(int *x, int *y, int *z)
{
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index ff5deaf9ab850d..59636e622731b8 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -9,6 +9,9 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=5 -o - %s | FileCheck -check-prefix=V5 %s
+// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
+// RUN: -mcode-object-version=6 -o - %s | FileCheck -check-prefix=V6 %s
+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=none -o - %s | FileCheck %s -check-prefix=NONE
@@ -17,5 +20,6 @@
// V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400}
// V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// V6: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 600}
// NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version",
// INV: error: invalid value '4.1' in '-mcode-object-version=4.1'
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index 282e0a49b9aa10..7f56fe91704870 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -7,6 +7,10 @@
// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
+// RUN: | FileCheck -check-prefix=COV5 %s
+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COVNONE %s
diff --git a/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip
index af5f9a3da21dfd..d63130115588e0 100644
--- a/clang/test/Driver/hip-code-object-version.hip
+++ b/clang/test/Driver/hip-code-object-version.hip
@@ -23,6 +23,18 @@
// V5: "-mllvm" "--amdhsa-code-object-version=5"
// V5: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906"
+// Check bundle ID for code object version 6.
+
+// RUN: not %clang -### --target=x86_64-linux-gnu \
+// RUN: -mcode-object-version=6 \
+// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
+// RUN: %s 2>&1 | FileCheck -check-prefix=V6 %s
+
+// V6: "-mcode-object-version=6"
+// V6: "-mllvm" "--amdhsa-code-object-version=6"
+// V6: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906"
+
+
// Check bundle ID for code object version default
// RUN: %clang -### --target=x86_64-linux-gnu \
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index 6ac5778721ba5b..a998db531d6683 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -187,13 +187,26 @@
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
-// Test -mcode-object-version=5 with old device library without abi_version_400.bc
+// Test -mcode-object-version=5 with old device library without abi_version_500.bc
// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: -mcode-object-version=5 \
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
+// Test -mcode-object-version=6
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -mcode-object-version=6 \
+// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6
+
+// Test -mcode-object-version=6 with old device library without abi_version_600.bc
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -mcode-object-version=6 \
+// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
+// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI6
+
// ALL-NOT: error:
// ALL: {{"[^"]*clang[^"]*"}}
@@ -237,7 +250,10 @@
// ABI4: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc"
// ABI5-NOT: error:
// ABI5: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc"
+// ABI6-NOT: error:
+// ABI6: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_600.bc"
// NOABI4-NOT: error:
// NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc"
// NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc"
// NOABI5: error: cannot find ROCm device libraryfor ABI version 5; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
+// NOABI6: error: cannot find ROCm device libraryfor ABI version 6; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index b65b6e31bea821..cf4b2a38bff7a8 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -284,6 +284,8 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
if (const llvm::opt::Arg *a = args.getLastArg(
clang::driver::options::OPT_mcode_object_version_EQ)) {
llvm::StringRef s = a->getValue();
+ if (s == "6")
+ opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_6;
if (s == "5")
opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_5;
if (s == "4")
diff --git a/flang/test/Lower/AMD/code-object-version.f90 b/flang/test/Lower/AMD/code-object-version.f90
index 7cb9dc079724e7..455f4547252829 100644
--- a/flang/test/Lower/AMD/code-object-version.f90
+++ b/flang/test/Lower/AMD/code-object-version.f90
@@ -3,11 +3,12 @@
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=none %s -o - | FileCheck --check-prefix=COV_NONE %s
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=4 %s -o - | FileCheck --check-prefix=COV_4 %s
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s
+!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=6 %s -o - | FileCheck --check-prefix=COV_6 %s
!COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
!COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
!COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
!COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32
+!COV_6: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(600 : i32) {addr_space = 4 : i32} : i32
subroutine target_simple
end subroutine target_simple
-
diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp
index 650744db7dee32..bc1e78cfcc963d 100644
--- a/lld/ELF/Arch/AMDGPU.cpp
+++ b/lld/ELF/Arch/AMDGPU.cpp
@@ -25,6 +25,7 @@ class AMDGPU final : public TargetInfo {
private:
uint32_t calcEFlagsV3() const;
uint32_t calcEFlagsV4() const;
+ uint32_t calcEFlagsV6() const;
public:
AMDGPU();
@@ -106,6 +107,25 @@ uint32_t AMDGPU::calcEFlagsV4() const {
return retMach | retXnack | retSramEcc;
}
+uint32_t AMDGPU::calcEFlagsV6() const {
+ uint32_t flags = calcEFlagsV4();
+
+ uint32_t genericVersion =
+ getEFlags(ctx.objectFiles[0]) & EF_AMDGPU_GENERIC_VERSION;
+
+ // Verify that all input files have compatible generic version.
+ for (InputFile *f : ArrayRef(ctx.objectFiles).slice(1)) {
+ if (genericVersion != (getEFlags(f) & EF_AMDGPU_GENERIC_VERSION)) {
+ // TODO: test
+ error("incompatible generic version: " + toString(f));
+ return 0;
+ }
+ }
+
+ flags |= genericVersion;
+ return flags;
+}
+
uint32_t AMDGPU::calcEFlags() const {
if (ctx.objectFiles.empty())
return 0;
@@ -121,6 +141,8 @@ uint32_t AMDGPU::calcEFlags() const {
case ELFABIVERSION_AMDGPU_HSA_V4:
case ELFABIVERSION_AMDGPU_HSA_V5:
return calcEFlagsV4();
+ case ELFABIVERSION_AMDGPU_HSA_V6:
+ return calcEFlagsV6();
default:
error("unknown abi version: " + Twine(abiVersion));
return 0;
diff --git a/lld/test/ELF/amdgpu-tid.s b/lld/test/ELF/amdgpu-tid.s
index 6623443a4541d7..ee0062eb750c86 100644
--- a/lld/test/ELF/amdgpu-tid.s
+++ b/lld/test/ELF/amdgpu-tid.s
@@ -43,3 +43,19 @@
# SRAMECC-OFF: EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 (0x800)
# SRAMECC-ON: EF_AMDGPU_FEATURE_SRAMECC_ON_V4 (0xC00)
# SRAMECC-INCOMPATIBLE: incompatible sramecc:
+
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_0.o
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_1.o
+# RUN: ld.lld -shared %t-genericv1_0.o %t-genericv1_1.o -o %t-genericv1_2.so
+# RUN: llvm-readobj --file-headers %t-genericv1_2.so | FileCheck --check-prefix=GENERICV1 %s
+
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_0.o
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_1.o
+# RUN: ld.lld -shared %t-genericv2_0.o %t-genericv2_1.o -o %t-genericv2_2.so
+# RUN: llvm-readobj --file-headers %t-genericv2_2.so | FileCheck --check-prefix=GENERICV2 %s
+
+# RUN: not ld.lld -shared %t-genericv1_0.o %t-genericv2_0.o -o /dev/null 2>&1 | FileCheck --check-prefix=GENERIC-INCOMPATIBLE %s
+
+# GENERICV1: EF_AMDGPU_GENERIC_VERSION_V1 (0x1000000)
+# GENERICV2: EF_AMDGPU_GENERIC_VERSION_V2 (0x2000000)
+# GENERIC-INCOMPATIBLE: incompatible generic version
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 0f968eac36e72f..6bfdd94b7f372f 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -374,7 +374,8 @@ enum {
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
- ELFABIVERSION_AMDGPU_HSA_V5 = 3
+ ELFABIVERSION_AMDGPU_HSA_V5 = 3,
+ ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};
#define ELF_RELOC(name, value) name = value,
@@ -839,6 +840,15 @@ enum : unsigned {
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
// SRAMECC is on.
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
+
+ // Generic target versioning. This is contained in the list byte of EFLAGS.
+ EF_AMDGPU_GENERIC_VERSION = 0xff000000,
+ EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
+ EF_AMDGPU_GENERIC_VERSION_V1 = 0x01000000, // 1 << 24
+ EF_AMDGPU_GENERIC_VERSION_V2 = 0x02000000, // 2 << 24
+ EF_AMDGPU_GENERIC_VERSION_V3 = 0x03000000, // 3 << 24
+ EF_AMDGPU_GENERIC_VERSION_V4 = 0x04000000, // 4 << 24
+ EF_AMDGPU_GENERIC_VERSION_MAX = 4,
};
// ELF Relocation types for AMDGPU
diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h
index e0838a1f425ea5..4065549277f3b2 100644
--- a/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -49,6 +49,11 @@ constexpr uint32_t VersionMajorV5 = 1;
/// HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV5 = 2;
+/// HSA metadata major version for code object V5.
+constexpr uint32_t VersionMajorV6 = 1;
+/// HSA metadata minor version for code object V5.
+constexpr uint32_t VersionMinorV6 = 3;
+
/// HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
/// HSA metadata ending assembler directive.
diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h
index aaaed3f5ceac62..7f627cdd90b4ce 100644
--- a/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/llvm/include/llvm/Support/ScopedPrinter.h
@@ -160,7 +160,7 @@ class ScopedPrinter {
template <typename T, typename TFlag>
void printFlags(StringRef Label, T Value, ArrayRef<EnumEntry<TFlag>> Flags,
TFlag EnumMask1 = {}, TFlag EnumMask2 = {},
- TFlag EnumMask3 = {}) {
+ TFlag EnumMask3 = {}, TFlag EnumMask4 = {}) {
SmallVector<FlagEntry, 10> SetFlags;
for (const auto &Flag : Flags) {
@@ -174,6 +174,8 @@ class ScopedPrinter {
EnumMask = EnumMask2;
else if (Flag.Value & EnumMask3)
EnumMask = EnumMask3;
+ else if (Flag.Value & EnumMask4)
+ EnumMask = EnumMask4;
bool IsEnum = (Flag.Value & EnumMask) != 0;
if ((!IsEnum && (Value & Flag.Value) == Flag.Value) ||
(IsEnum && (Value & EnumMask) == Flag.Value)) {
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index 4df8...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/76954
More information about the flang-commits
mailing list