[flang] [llvm] [clang] [lld] [AMDGPU] Introduce GFX9/10.1/10.3/11 Generic Targets (PR #76955)
Pierre van Houtryve via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 10 07:28:07 PST 2024
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/76955
>From 6368d8210e211948b5a03ab326b9966977775b8d Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 4 Jan 2024 14:12:00 +0100
Subject: [PATCH 1/3] [AMDGPU] Introduce Code Object V6
Introduce Code Object V6 in Clang, LLD, Flang and LLVM.
This is the same as V5 except a new "generic version" flag can be present in EFLAGS. This is related to new generic targets that'll be added in a follow-up patch. It's also likely V6 will have new changes (possibly new metadata entries) added later.
Docs change are not included, I'm planning to do them in a follow-up patch all at once (when generic targets land too).
---
clang/include/clang/Driver/Options.td | 4 +-
clang/lib/CodeGen/CGBuiltin.cpp | 6 +-
clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +-
.../amdgpu-code-object-version-linking.cu | 37 +++
.../CodeGenCUDA/amdgpu-code-object-version.cu | 4 +
.../test/CodeGenCUDA/amdgpu-workgroup-size.cu | 4 +
.../amdgcn/bitcode/oclc_abi_version_600.bc | 0
clang/test/Driver/hip-code-object-version.hip | 12 +
clang/test/Driver/hip-device-libs.hip | 18 +-
flang/lib/Frontend/CompilerInvocation.cpp | 2 +
flang/test/Lower/AMD/code-object-version.f90 | 3 +-
lld/ELF/Arch/AMDGPU.cpp | 21 ++
lld/test/ELF/amdgpu-tid.s | 16 ++
llvm/include/llvm/BinaryFormat/ELF.h | 9 +-
llvm/include/llvm/Support/AMDGPUMetadata.h | 5 +
llvm/include/llvm/Support/ScopedPrinter.h | 4 +-
llvm/include/llvm/Target/TargetOptions.h | 1 +
llvm/lib/ObjectYAML/ELFYAML.cpp | 9 +
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 +
.../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 10 +
.../Target/AMDGPU/AMDGPUHSAMetadataStreamer.h | 11 +-
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 27 +++
.../MCTargetDesc/AMDGPUTargetStreamer.h | 1 +
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 13 +
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 5 +-
...licit-kernarg-backend-usage-global-isel.ll | 2 +
.../AMDGPU/call-graph-register-usage.ll | 1 +
.../AMDGPU/codegen-internal-only-func.ll | 2 +
llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll | 4 +
.../enable-scratch-only-dynamic-stack.ll | 1 +
.../AMDGPU/implicit-kernarg-backend-usage.ll | 2 +
.../AMDGPU/implicitarg-offset-attributes.ll | 46 ++++
.../AMDGPU/llvm.amdgcn.implicitarg.ptr.ll | 1 +
llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll | 1 +
llvm/test/CodeGen/AMDGPU/recursion.ll | 1 +
.../AMDGPU/resource-usage-dead-function.ll | 1 +
.../AMDGPU/tid-mul-func-xnack-all-any.ll | 6 +
.../tid-mul-func-xnack-all-not-supported.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-all-off.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-all-on.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-any-off-1.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-any-off-2.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-any-on-1.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-any-on-2.ll | 6 +
.../tid-one-func-xnack-not-supported.ll | 6 +
.../CodeGen/AMDGPU/tid-one-func-xnack-off.ll | 6 +
.../CodeGen/AMDGPU/tid-one-func-xnack-on.ll | 6 +
.../MC/AMDGPU/hsa-v5-uses-dynamic-stack.s | 5 +
.../elf-headers.test} | 0
.../ELF/AMDGPU/generic_versions.s | 16 ++
.../ELF/AMDGPU/generic_versions.test | 26 ++
llvm/tools/llvm-readobj/ELFDumper.cpp | 224 ++++++++----------
52 files changed, 491 insertions(+), 135 deletions(-)
create mode 100644 clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc
rename llvm/test/tools/llvm-readobj/ELF/{amdgpu-elf-headers.test => AMDGPU/elf-headers.test} (100%)
create mode 100644 llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s
create mode 100644 llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index bffdddc28aac60..f9381d0706f447 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4761,9 +4761,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
- Values<"none,4,5">,
+ Values<"none,4,5,6">,
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
- NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
+ NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>,
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
defm cumode : SimpleMFlag<"cumode",
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f71dbf1729a1d6..be86731ed912ea 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17481,9 +17481,9 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
/// Emit code based on Code Object ABI version.
/// COV_4 : Emit code to use dispatch ptr
-/// COV_5 : Emit code to use implicitarg ptr
+/// COV_5+ : Emit code to use implicitarg ptr
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
-/// and use its value for COV_4 or COV_5 approach. It is used for
+/// and use its value for COV_4 or COV_5+ approach. It is used for
/// compiling device libraries in an ABI-agnostic way.
///
/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
@@ -17526,7 +17526,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
} else {
Value *GEP = nullptr;
- if (Cov == CodeObjectVersionKind::COV_5) {
+ if (Cov >= CodeObjectVersionKind::COV_5) {
// Indexing the implicit kernarg segment.
GEP = CGF.Builder.CreateConstGEP1_32(
CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 2340191ca97d98..75582f6b5669d5 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2585,7 +2585,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
const unsigned MinCodeObjVer = 4;
- const unsigned MaxCodeObjVer = 5;
+ const unsigned MaxCodeObjVer = 6;
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
if (CodeObjArg->getOption().getID() ==
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
index 663687ae227f23..d33acdf7eb8bed 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
@@ -4,6 +4,9 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
// RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s
+// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
+// RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s
+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
// RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s
@@ -15,6 +18,10 @@
// RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\
// RUN: FileCheck -check-prefix=LINKED5 %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
+// RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\
+// RUN: FileCheck -check-prefix=LINKED6 %s
+
#include "Inputs/cuda.h"
// LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
@@ -77,6 +84,36 @@
// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// LINKED5: "amdgpu_code_object_version", i32 500
+// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
+// LINKED6-LABEL: bar
+// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
+// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
+// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
+// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
+// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
+// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+
+// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
+// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
+// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
+// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
+// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
+// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+
+// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
+// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
+// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
+// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
+// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
+// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+// LINKED6: "amdgpu_code_object_version", i32 600
+
#ifdef DEVICELIB
__device__ void bar(int *x, int *y, int *z)
{
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index ff5deaf9ab850d..59636e622731b8 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -9,6 +9,9 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=5 -o - %s | FileCheck -check-prefix=V5 %s
+// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
+// RUN: -mcode-object-version=6 -o - %s | FileCheck -check-prefix=V6 %s
+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=none -o - %s | FileCheck %s -check-prefix=NONE
@@ -17,5 +20,6 @@
// V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400}
// V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// V6: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 600}
// NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version",
// INV: error: invalid value '4.1' in '-mcode-object-version=4.1'
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index 282e0a49b9aa10..7f56fe91704870 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -7,6 +7,10 @@
// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
+// RUN: | FileCheck -check-prefix=COV5 %s
+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COVNONE %s
diff --git a/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip
index af5f9a3da21dfd..d63130115588e0 100644
--- a/clang/test/Driver/hip-code-object-version.hip
+++ b/clang/test/Driver/hip-code-object-version.hip
@@ -23,6 +23,18 @@
// V5: "-mllvm" "--amdhsa-code-object-version=5"
// V5: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906"
+// Check bundle ID for code object version 6.
+
+// RUN: not %clang -### --target=x86_64-linux-gnu \
+// RUN: -mcode-object-version=6 \
+// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
+// RUN: %s 2>&1 | FileCheck -check-prefix=V6 %s
+
+// V6: "-mcode-object-version=6"
+// V6: "-mllvm" "--amdhsa-code-object-version=6"
+// V6: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906"
+
+
// Check bundle ID for code object version default
// RUN: %clang -### --target=x86_64-linux-gnu \
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index 6ac5778721ba5b..a998db531d6683 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -187,13 +187,26 @@
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
-// Test -mcode-object-version=5 with old device library without abi_version_400.bc
+// Test -mcode-object-version=5 with old device library without abi_version_500.bc
// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: -mcode-object-version=5 \
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
+// Test -mcode-object-version=6
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -mcode-object-version=6 \
+// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6
+
+// Test -mcode-object-version=6 with old device library without abi_version_600.bc
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -mcode-object-version=6 \
+// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
+// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI6
+
// ALL-NOT: error:
// ALL: {{"[^"]*clang[^"]*"}}
@@ -237,7 +250,10 @@
// ABI4: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc"
// ABI5-NOT: error:
// ABI5: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc"
+// ABI6-NOT: error:
+// ABI6: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_600.bc"
// NOABI4-NOT: error:
// NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc"
// NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc"
// NOABI5: error: cannot find ROCm device libraryfor ABI version 5; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
+// NOABI6: error: cannot find ROCm device libraryfor ABI version 6; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index b65b6e31bea821..cf4b2a38bff7a8 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -284,6 +284,8 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
if (const llvm::opt::Arg *a = args.getLastArg(
clang::driver::options::OPT_mcode_object_version_EQ)) {
llvm::StringRef s = a->getValue();
+ if (s == "6")
+ opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_6;
if (s == "5")
opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_5;
if (s == "4")
diff --git a/flang/test/Lower/AMD/code-object-version.f90 b/flang/test/Lower/AMD/code-object-version.f90
index 7cb9dc079724e7..455f4547252829 100644
--- a/flang/test/Lower/AMD/code-object-version.f90
+++ b/flang/test/Lower/AMD/code-object-version.f90
@@ -3,11 +3,12 @@
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=none %s -o - | FileCheck --check-prefix=COV_NONE %s
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=4 %s -o - | FileCheck --check-prefix=COV_4 %s
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s
+!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=6 %s -o - | FileCheck --check-prefix=COV_6 %s
!COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
!COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
!COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
!COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32
+!COV_6: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(600 : i32) {addr_space = 4 : i32} : i32
subroutine target_simple
end subroutine target_simple
-
diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp
index 650744db7dee32..d9440acec9ddad 100644
--- a/lld/ELF/Arch/AMDGPU.cpp
+++ b/lld/ELF/Arch/AMDGPU.cpp
@@ -25,6 +25,7 @@ class AMDGPU final : public TargetInfo {
private:
uint32_t calcEFlagsV3() const;
uint32_t calcEFlagsV4() const;
+ uint32_t calcEFlagsV6() const;
public:
AMDGPU();
@@ -106,6 +107,24 @@ uint32_t AMDGPU::calcEFlagsV4() const {
return retMach | retXnack | retSramEcc;
}
+uint32_t AMDGPU::calcEFlagsV6() const {
+ uint32_t flags = calcEFlagsV4();
+
+ uint32_t genericVersion =
+ getEFlags(ctx.objectFiles[0]) & EF_AMDGPU_GENERIC_VERSION;
+
+ // Verify that all input files have compatible generic version.
+ for (InputFile *f : ArrayRef(ctx.objectFiles).slice(1)) {
+ if (genericVersion != (getEFlags(f) & EF_AMDGPU_GENERIC_VERSION)) {
+ error("incompatible generic version: " + toString(f));
+ return 0;
+ }
+ }
+
+ flags |= genericVersion;
+ return flags;
+}
+
uint32_t AMDGPU::calcEFlags() const {
if (ctx.objectFiles.empty())
return 0;
@@ -121,6 +140,8 @@ uint32_t AMDGPU::calcEFlags() const {
case ELFABIVERSION_AMDGPU_HSA_V4:
case ELFABIVERSION_AMDGPU_HSA_V5:
return calcEFlagsV4();
+ case ELFABIVERSION_AMDGPU_HSA_V6:
+ return calcEFlagsV6();
default:
error("unknown abi version: " + Twine(abiVersion));
return 0;
diff --git a/lld/test/ELF/amdgpu-tid.s b/lld/test/ELF/amdgpu-tid.s
index 6623443a4541d7..ee0062eb750c86 100644
--- a/lld/test/ELF/amdgpu-tid.s
+++ b/lld/test/ELF/amdgpu-tid.s
@@ -43,3 +43,19 @@
# SRAMECC-OFF: EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 (0x800)
# SRAMECC-ON: EF_AMDGPU_FEATURE_SRAMECC_ON_V4 (0xC00)
# SRAMECC-INCOMPATIBLE: incompatible sramecc:
+
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_0.o
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_1.o
+# RUN: ld.lld -shared %t-genericv1_0.o %t-genericv1_1.o -o %t-genericv1_2.so
+# RUN: llvm-readobj --file-headers %t-genericv1_2.so | FileCheck --check-prefix=GENERICV1 %s
+
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_0.o
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_1.o
+# RUN: ld.lld -shared %t-genericv2_0.o %t-genericv2_1.o -o %t-genericv2_2.so
+# RUN: llvm-readobj --file-headers %t-genericv2_2.so | FileCheck --check-prefix=GENERICV2 %s
+
+# RUN: not ld.lld -shared %t-genericv1_0.o %t-genericv2_0.o -o /dev/null 2>&1 | FileCheck --check-prefix=GENERIC-INCOMPATIBLE %s
+
+# GENERICV1: EF_AMDGPU_GENERIC_VERSION_V1 (0x1000000)
+# GENERICV2: EF_AMDGPU_GENERIC_VERSION_V2 (0x2000000)
+# GENERIC-INCOMPATIBLE: incompatible generic version
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 9b8128a9ec4060..256b4229e66bd7 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -375,7 +375,8 @@ enum {
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
- ELFABIVERSION_AMDGPU_HSA_V5 = 3
+ ELFABIVERSION_AMDGPU_HSA_V5 = 3,
+ ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};
#define ELF_RELOC(name, value) name = value,
@@ -840,6 +841,12 @@ enum : unsigned {
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
// SRAMECC is on.
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
+
+ // Generic target versioning. This is contained in the list byte of EFLAGS.
+ EF_AMDGPU_GENERIC_VERSION = 0xff000000,
+ EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
+ EF_AMDGPU_GENERIC_VERSION_MIN = 1,
+ EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
};
// ELF Relocation types for AMDGPU
diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h
index e0838a1f425ea5..4065549277f3b2 100644
--- a/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -49,6 +49,11 @@ constexpr uint32_t VersionMajorV5 = 1;
/// HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV5 = 2;
+/// HSA metadata major version for code object V5.
+constexpr uint32_t VersionMajorV6 = 1;
+/// HSA metadata minor version for code object V5.
+constexpr uint32_t VersionMinorV6 = 3;
+
/// HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
/// HSA metadata ending assembler directive.
diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h
index aaaed3f5ceac62..7f627cdd90b4ce 100644
--- a/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/llvm/include/llvm/Support/ScopedPrinter.h
@@ -160,7 +160,7 @@ class ScopedPrinter {
template <typename T, typename TFlag>
void printFlags(StringRef Label, T Value, ArrayRef<EnumEntry<TFlag>> Flags,
TFlag EnumMask1 = {}, TFlag EnumMask2 = {},
- TFlag EnumMask3 = {}) {
+ TFlag EnumMask3 = {}, TFlag EnumMask4 = {}) {
SmallVector<FlagEntry, 10> SetFlags;
for (const auto &Flag : Flags) {
@@ -174,6 +174,8 @@ class ScopedPrinter {
EnumMask = EnumMask2;
else if (Flag.Value & EnumMask3)
EnumMask = EnumMask3;
+ else if (Flag.Value & EnumMask4)
+ EnumMask = EnumMask4;
bool IsEnum = (Flag.Value & EnumMask) != 0;
if ((!IsEnum && (Value & Flag.Value) == Flag.Value) ||
(IsEnum && (Value & EnumMask) == Flag.Value)) {
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index 4df897c047a38a..c977e450c489d5 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -129,6 +129,7 @@ namespace llvm {
COV_3 = 300, // Unsupported.
COV_4 = 400,
COV_5 = 500,
+ COV_6 = 600,
};
class TargetOptions {
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 6ad4a067415ac8..ff7151e016a9ba 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -620,6 +620,15 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCase(EF_AMDGPU_FEATURE_XNACK_V3);
BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
+ for (unsigned K = ELF::EF_AMDGPU_GENERIC_VERSION_MIN;
+ K <= ELF::EF_AMDGPU_GENERIC_VERSION_MAX; ++K) {
+ std::string Key = "EF_AMDGPU_GENERIC_VERSION_V" + std::to_string(K);
+ IO.maskedBitSetCase(Value, Key.c_str(),
+ K << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET,
+ ELF::EF_AMDGPU_GENERIC_VERSION);
+ }
+ [[fallthrough]];
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index d317a733d4331c..3185be373454b4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -333,6 +333,9 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
case AMDGPU::AMDHSA_COV5:
HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5());
break;
+ case AMDGPU::AMDHSA_COV6:
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV6());
+ break;
default:
report_fatal_error("Unexpected code object version");
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 74e9cd7d09654c..69ef5cde199776 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -677,6 +677,16 @@ void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
}
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV6
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerMsgPackV6::emitVersion() {
+ auto Version = HSAMetadataDoc->getArrayNode();
+ Version.push_back(Version.getDocument()->getNode(VersionMajorV6));
+ Version.push_back(Version.getDocument()->getNode(VersionMinorV6));
+ getRootMetadata("amdhsa.version") = Version;
+}
} // end namespace HSAMD
} // end namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 6d6bd86711b13f..26229af638f22a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -135,7 +135,7 @@ class MetadataStreamerMsgPackV4 : public MetadataStreamer {
const SIProgramInfo &ProgramInfo) override;
};
-class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 {
+class MetadataStreamerMsgPackV5 : public MetadataStreamerMsgPackV4 {
protected:
void emitVersion() override;
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
@@ -147,6 +147,15 @@ class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 {
~MetadataStreamerMsgPackV5() = default;
};
+class MetadataStreamerMsgPackV6 final : public MetadataStreamerMsgPackV5 {
+protected:
+ void emitVersion() override;
+
+public:
+ MetadataStreamerMsgPackV6() = default;
+ ~MetadataStreamerMsgPackV6() = default;
+};
+
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index e135a4e25dd15a..8b3822677345d8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/TargetParser/TargetParser.h"
@@ -35,6 +36,12 @@ using namespace llvm::AMDGPU;
// AMDGPUTargetStreamer
//===----------------------------------------------------------------------===//
+static cl::opt<unsigned>
+ ForceGenericVersion("amdgpu-force-generic-version",
+ cl::desc("Force a specific generic_v<N> flag to be "
+ "added. For testing purposes only."),
+ cl::ReallyHidden, cl::init(0));
+
static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor,
uint32_t &Stepping, bool Sramecc, bool Xnack) {
if (Major == 9 && Minor == 0) {
@@ -434,6 +441,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
break;
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
+ case AMDGPU::AMDHSA_COV6:
if (getTargetID()->isXnackSupported())
OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
break;
@@ -623,6 +631,8 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
return getEFlagsV4();
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
+ return getEFlagsV6();
}
}
@@ -697,6 +707,23 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
return EFlagsV4;
}
+unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
+ unsigned Flags = getEFlagsV4();
+
+ unsigned Version = ForceGenericVersion;
+
+ // Versions start at 1.
+ if (Version) {
+ if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX)
+ report_fatal_error("Cannot encode generic code object version " +
+ Twine(Version) +
+ " - no ELF flag can represent this version!");
+ Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
+ }
+
+ return Flags;
+}
+
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 55b5246c92100a..a2f9b414dd50c5 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -188,6 +188,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
unsigned getEFlagsV3();
unsigned getEFlagsV4();
+ unsigned getEFlagsV6();
public:
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 26ba2575ff34ac..e8b9cec90fc69f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -132,6 +132,8 @@ std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
case 5:
return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
+ case 6:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
default:
report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
Twine(AmdhsaCodeObjectVersion));
@@ -150,6 +152,12 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
return false;
}
+bool isHsaAbiVersion6(const MCSubtargetInfo *STI) {
+ if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
+ return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V6;
+ return false;
+}
+
unsigned getAmdhsaCodeObjectVersion() {
return AmdhsaCodeObjectVersion;
}
@@ -169,6 +177,7 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
case AMDHSA_COV4:
return 48;
case AMDHSA_COV5:
+ case AMDHSA_COV6:
default:
return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
}
@@ -182,6 +191,7 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
case AMDHSA_COV4:
return 24;
case AMDHSA_COV5:
+ case AMDHSA_COV6:
default:
return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
}
@@ -192,6 +202,7 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
case AMDHSA_COV4:
return 32;
case AMDHSA_COV5:
+ case AMDHSA_COV6:
default:
return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
}
@@ -202,6 +213,7 @@ unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
case AMDHSA_COV4:
return 40;
case AMDHSA_COV5:
+ case AMDHSA_COV6:
default:
return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
}
@@ -782,6 +794,7 @@ std::string AMDGPUTargetID::toString() const {
switch (CodeObjectVersion) {
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
+ case AMDGPU::AMDHSA_COV6:
// sramecc.
if (getSramEccSetting() == TargetIDSetting::Off)
Features += ":sramecc-";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 50c741760d7143..c2fdb4e817099c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,7 +42,7 @@ namespace AMDGPU {
struct IsaVersion;
-enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 };
+enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
/// \returns True if \p STI is AMDHSA.
bool isHsaAbi(const MCSubtargetInfo &STI);
@@ -54,6 +54,9 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 5,
/// false otherwise.
bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 6,
+/// false otherwise.
+bool isHsaAbiVersion6(const MCSubtargetInfo *STI);
/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index 4bdbe6604782a8..03374e62e7e9f6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -1,9 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
; GFX8V4-LABEL: addrspacecast:
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index bae693ba2fa3be..2e43f685fd70a0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -1,5 +1,6 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll
index 680fae1869600e..a47a54e3e1d668 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll
@@ -2,6 +2,7 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=OPT,COV4 %s
; RUN: not llc --crash -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=OPT,COV5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=OPT,COV6 %s
; AMDGPUAttributor deletes the function "by accident" so it's never
; codegened with optimizations.
@@ -17,6 +18,7 @@
; OPT-NEXT: - 1
; COV4: - 1
; COV5: - 2
+; COV6: - 3
; OPT: ...
define internal i32 @func() {
ret i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
index f8fc3e1e764801..8178fecbbbe5f4 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
@@ -7,6 +7,9 @@
; RUN: llc -filetype=obj -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=5 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA5 %s
; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=5 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA5 %s
; RUN: llc -filetype=obj -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=5 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA5 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=6 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA6 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=6 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA6 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=6 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA6 %s
; RUN: llc -filetype=obj -mtriple=amdgcn--amdpal -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=PAL %s
; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdpal -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=PAL %s
; RUN: llc -filetype=obj -mtriple=amdgcn-unknown-amdpal -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=PAL %s
@@ -18,6 +21,7 @@
; HSA: OS/ABI: AMDGPU_HSA (0x40)
; HSA4: ABIVersion: 2
; HSA5: ABIVersion: 3
+; HSA6: ABIVersion: 4
; PAL: OS/ABI: AMDGPU_PAL (0x41)
; PAL: ABIVersion: 0
; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
diff --git a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
index 22f90682aa9738..d91c899a27ebf3 100644
--- a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
+++ b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
@@ -1,3 +1,4 @@
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
index 9e6c0ef86906dd..30fe4a80e693b9 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
@@ -1,9 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
; GFX8V4-LABEL: addrspacecast:
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index d5590754d78bc7..a8263a317baac8 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V6 %s
declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
@@ -122,6 +123,15 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %
; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
; V5-NEXT: ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V6-NEXT: call void @use_everything_else()
+; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
+; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V6-NEXT: ret void
;
call void @use_everything_else()
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -149,6 +159,15 @@ define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %
; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
; V5-NEXT: ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; V6-NEXT: call void @use_everything_else()
+; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
+; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V6-NEXT: ret void
;
call void @use_everything_else()
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -176,6 +195,15 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr
; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
; V5-NEXT: ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V6-NEXT: call void @use_everything_else()
+; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
+; V6-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
+; V6-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
+; V6-NEXT: ret void
;
call void @use_everything_else()
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -203,6 +231,15 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr
; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
; V5-NEXT: ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] {
+; V6-NEXT: call void @use_everything_else()
+; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
+; V6-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
+; V6-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
+; V6-NEXT: ret void
;
call void @use_everything_else()%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -234,7 +271,16 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
; V5: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
; V5: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
;.
+; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; V6: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+;.
; V4: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 400}
;.
; V5: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500}
;.
+; V6: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 600}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index f4c55e602c64cc..ebbbe8aaa3a113 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -1,3 +1,4 @@
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s
diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
index ff06f98df56371..494ace8a641e8f 100644
--- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=MUBUF,DEFAULTSIZE %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=MUBUF,DEFAULTSIZE-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=MUBUF,DEFAULTSIZE-V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=MUBUF,ASSUME1024 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=MUBUF,ASSUME1024 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch | FileCheck -check-prefixes=FLATSCR,DEFAULTSIZE %s
diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll
index 95c1a085ee8cf4..ccf30b5a593f7a 100644
--- a/llvm/test/CodeGen/AMDGPU/recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -1,5 +1,6 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s
; CHECK-LABEL: {{^}}recursive:
; CHECK: ScratchSize: 16
diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
index c30089a8dd32a0..503b3348757971 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
@@ -1,5 +1,6 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN-V5 %s
; Make sure there's no assertion when trying to report the resource
; usage for a function which becomes dead during codegen.
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll
index b78f6412cd677a..223738345242c8 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x12C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll
index a3c75e09975331..073bdcb8a37fbc 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
; ELF-NEXT: ]
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
index d4d8af8c1a99be..ce775c5bc124d9 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
index 9ca8b055a27979..568a2312aa479d 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
index fd3f5878469e66..0e011054738c45 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
index 34673dd5b89197..bb79876dee6dc7 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
index c283ece7e8bdff..038cd4cd8355c1 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
index 869254cae5258b..32013837f0788d 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll
index cc04eb0e661d65..e30ff18bd4873f 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
; ELF-NEXT: ]
diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
index e84778b526f11d..a11b15ad526cc7 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
index a1ab6ed5f082ac..50f8bc81909f3a 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s
index 6edac771faa07e..999d1bd187664f 100644
--- a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s
+++ b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s
@@ -3,6 +3,11 @@
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=6 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=6 -mattr=+xnack -filetype=obj < %s > %t
+// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
// READOBJ: Section Headers
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
diff --git a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
similarity index 100%
rename from llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
rename to llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s
new file mode 100644
index 00000000000000..337938e2a57baf
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s
@@ -0,0 +1,16 @@
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -o %t.o
+; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V1
+
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=4 -o %t.o
+; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V4
+
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=32 -o %t.o
+; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V32
+
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=255 -o %t.o
+; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V255
+
+; V1: generic_v1
+; V4: generic_v4
+; V32: generic_v32
+; V255: generic_v255
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test
new file mode 100644
index 00000000000000..ae7f96c92c266e
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test
@@ -0,0 +1,26 @@
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V1
+# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V1
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V32
+# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V32
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V126
+# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V126
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V255
+# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V255
+
+# V1: generic_v1
+# V32: generic_v32
+# V126: generic_v126
+# V255: generic_v255
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ OSABI: ELFOSABI_AMDGPU_HSA
+ ABIVersion: [[ABI_VERSION]]
+ Type: ET_REL
+ Machine: EM_AMDGPU
+ Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, [[GENERICVER]] ]
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index f369a63add1149..d1382e829bb3aa 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -621,7 +621,7 @@ template <typename ELFT> class GNUELFDumper : public ELFDumper<ELFT> {
template <typename T, typename TEnum>
std::string printFlags(T Value, ArrayRef<EnumEntry<TEnum>> EnumValues,
TEnum EnumMask1 = {}, TEnum EnumMask2 = {},
- TEnum EnumMask3 = {}) const {
+ TEnum EnumMask3 = {}, TEnum EnumMask4 = {}) const {
std::string Str;
for (const EnumEntry<TEnum> &Flag : EnumValues) {
if (Flag.Value == 0)
@@ -634,6 +634,8 @@ template <typename ELFT> class GNUELFDumper : public ELFDumper<ELFT> {
EnumMask = EnumMask2;
else if (Flag.Value & EnumMask3)
EnumMask = EnumMask3;
+ else if (Flag.Value & EnumMask4)
+ EnumMask = EnumMask4;
bool IsEnum = (Flag.Value & EnumMask) != 0;
if ((!IsEnum && (Value & Flag.Value) == Flag.Value) ||
(IsEnum && (Value & EnumMask) == Flag.Value)) {
@@ -1558,134 +1560,89 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};
+// clang-format off
+#define AMDGPU_MACH_ENUM_ENTS \
+ ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201")
+// clang-format on
+
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
- ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"),
+ AMDGPU_MACH_ENUM_ENTS,
ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_V3, "xnack"),
ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_V3, "sramecc"),
};
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
- ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"),
+ AMDGPU_MACH_ENUM_ENTS,
+ ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ANY_V4, "xnack"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_OFF_V4, "xnack-"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ON_V4, "xnack+"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_ANY_V4, "sramecc"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_OFF_V4, "sramecc-"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_ON_V4, "sramecc+"),
+};
+
+const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion6[] = {
+ AMDGPU_MACH_ENUM_ENTS,
ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ANY_V4, "xnack"),
ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_OFF_V4, "xnack-"),
ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ON_V4, "xnack+"),
@@ -3678,6 +3635,19 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4));
break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
+ ElfFlags =
+ printFlags(e.e_flags, ArrayRef(ElfHeaderAMDGPUFlagsABIVersion6),
+ unsigned(ELF::EF_AMDGPU_MACH),
+ unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
+ unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4),
+ unsigned(ELF::EF_AMDGPU_GENERIC_VERSION));
+ if (auto GenericV = e.e_flags & ELF::EF_AMDGPU_GENERIC_VERSION) {
+ ElfFlags +=
+ ", generic_v" +
+ to_string(GenericV >> ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
+ }
+ break;
}
}
Str = "0x" + utohexstr(e.e_flags);
@@ -6949,6 +6919,14 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4));
break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
+ W.printFlags("Flags", E.e_flags,
+ ArrayRef(ElfHeaderAMDGPUFlagsABIVersion6),
+ unsigned(ELF::EF_AMDGPU_MACH),
+ unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
+ unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4),
+ unsigned(ELF::EF_AMDGPU_GENERIC_VERSION));
+ break;
}
} else if (E.e_machine == EM_RISCV)
W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderRISCVFlags));
>From 76cd2ea53cb90e4300df233ea694ad159b5cd2f8 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 4 Jan 2024 14:48:05 +0100
Subject: [PATCH 2/3] [AMDGPU] Introduce GFX9/10.1/10.3/11 Generic Targets
These generic targets include multiple GPUs and will, in the future, provide a way to build once and run on multiple GPU, at the cost of less optimization opportunities.
Note that this is just doing the compiler side of things, device libs an runtimes/loader/etc. don't know about these targets yet, so none of them actually work in practice right now. This is just the initial commit to make LLVM aware of them.
No docs in this patch either as I plan to do it all in a follow-up patch.
---
clang/lib/Basic/Targets/AMDGPU.cpp | 20 ++-
clang/test/Driver/amdgpu-macros.cl | 5 +
clang/test/Driver/amdgpu-mcpu.cl | 10 ++
llvm/include/llvm/BinaryFormat/ELF.h | 6 +-
llvm/include/llvm/TargetParser/TargetParser.h | 10 ++
llvm/lib/Object/ELFObjectFile.cpp | 10 ++
llvm/lib/ObjectYAML/ELFYAML.cpp | 4 +
llvm/lib/Target/AMDGPU/AMDGPU.td | 81 +++++------
.../AMDGPURemoveIncompatibleFunctions.cpp | 6 +-
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 7 +
llvm/lib/Target/AMDGPU/GCNProcessors.td | 20 +++
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 2 +
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 26 ++++
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 11 ++
llvm/lib/TargetParser/TargetParser.cpp | 34 +++++
.../CodeGen/AMDGPU/directive-amdgcn-target.ll | 10 ++
.../CodeGen/AMDGPU/elf-header-flags-mach.ll | 10 ++
llvm/test/CodeGen/AMDGPU/gds-allocation.ll | 1 +
llvm/test/CodeGen/AMDGPU/gds-atomic.ll | 1 +
.../AMDGPU/generic-targets-require-v6.ll | 18 +++
.../llvm.amdgcn.image.gather4.d16.dim.ll | 3 +
.../AMDGPU/llvm.amdgcn.image.sample.dim.ll | 3 +
.../AMDGPU/unsupported-image-sample.ll | 12 +-
.../Object/AMDGPU/elf-header-flags-mach.yaml | 29 ++++
.../llvm-objdump/ELF/AMDGPU/subtarget.ll | 20 +++
.../llvm-readobj/ELF/AMDGPU/elf-headers.test | 12 ++
llvm/tools/llvm-readobj/ELFDumper.cpp | 128 +++++++++---------
27 files changed, 384 insertions(+), 115 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 6f3a4908623da7..ca935174c05cec 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -275,13 +275,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__R600__");
if (GPUKind != llvm::AMDGPU::GK_NONE) {
- StringRef CanonName = isAMDGCN(getTriple()) ?
- getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
+ std::string CanonName = (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
+ : getArchNameR600(GPUKind))
+ .str();
+
+ // Sanitize the name of generic targets.
+ // e.g. gfx10.1-generic -> gfx10_1_generic
+ if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
+ GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
+ std::replace(CanonName.begin(), CanonName.end(), '.', '_');
+ std::replace(CanonName.begin(), CanonName.end(), '-', '_');
+ }
+
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
// Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
if (isAMDGCN(getTriple())) {
- assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name");
- Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
+ assert(StringRef(CanonName).starts_with("gfx") &&
+ "Invalid amdgcn canonical name");
+ StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
+ Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
Twine("__"));
}
if (isAMDGCN(getTriple())) {
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index 81c22af460d12d..3b10444ef71d36 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -131,6 +131,11 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11
+
// ARCH-GCN-DAG: #define FP_FAST_FMA 1
// FAST_FMAF-DAG: #define FP_FAST_FMAF 1
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index eeb16ae98ebad7..6f18ea0615cb69 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -115,6 +115,11 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s
+
// GCNDEFAULT-NOT: -target-cpu
// GFX600: "-target-cpu" "gfx600"
// GFX601: "-target-cpu" "gfx601"
@@ -160,3 +165,8 @@
// GFX1151: "-target-cpu" "gfx1151"
// GFX1200: "-target-cpu" "gfx1200"
// GFX1201: "-target-cpu" "gfx1201"
+
+// GFX9_GENERIC: "-target-cpu" "gfx9-generic"
+// GFX10_1_GENERIC: "-target-cpu" "gfx10.1-generic"
+// GFX10_3_GENERIC: "-target-cpu" "gfx10.3-generic"
+// GFX11_GENERIC: "-target-cpu" "gfx11-generic"
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 256b4229e66bd7..f04d4657d8f51d 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -788,11 +788,15 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
+ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x04f,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x050,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x051,
+ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x052,
// clang-format on
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 6464285980f00a..7da11701fefb8b 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -111,6 +111,14 @@ enum GPUKind : uint32_t {
GK_AMDGCN_FIRST = GK_GFX600,
GK_AMDGCN_LAST = GK_GFX1201,
+
+ GK_GFX9_GENERIC = 192,
+ GK_GFX10_1_GENERIC = 193,
+ GK_GFX10_3_GENERIC = 194,
+ GK_GFX11_GENERIC = 195,
+
+ GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC,
+ GK_AMDGCN_GENERIC_LAST = GK_GFX11_GENERIC,
};
/// Instruction set architecture version.
@@ -147,6 +155,8 @@ enum ArchFeatureKind : uint32_t {
FEATURE_WGP = 1 << 9,
};
+StringRef getArchFamilyNameAMDGCN(GPUKind AK);
+
StringRef getArchNameAMDGCN(GPUKind AK);
StringRef getArchNameR600(GPUKind AK);
StringRef getCanonicalArchName(const Triple &T, StringRef Arch);
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index ae21b81c10c82a..c9009be5fdbf6e 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -514,6 +514,16 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1200";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201:
return "gfx1201";
+
+ // Generic AMDGCN targets
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
+ return "gfx9-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
+ return "gfx10.1-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
+ return "gfx10.3-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
+ return "gfx11-generic";
default:
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index ff7151e016a9ba..9ebc3eb95c5548 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -612,6 +612,10 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index df8c35ffd4575a..35369f1c3a98e1 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -984,6 +984,12 @@ def FeatureGWS : SubtargetFeature<"gws",
"Has Global Wave Sync"
>;
+def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6",
+ "RequiresCOV6",
+ "true",
+ "Target Requires Code Object V6"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -1193,6 +1199,17 @@ def FeatureISAVersion9_0_Common : FeatureSet<
FeatureImageInsts,
FeatureMadMacF32Insts]>;
+def FeatureISAVersion9_0_Consumer_Common : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureImageGather4D16Bug,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureGDS])>;
+
+def FeatureISAVersion9_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion9_0_MI_Common : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureFmaMixInsts,
@@ -1211,43 +1228,27 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_0 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_2 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_4 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureFmaMixInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureFmaMixInsts])>;
def FeatureISAVersion9_0_6 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- HalfRate64Ops,
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [HalfRate64Ops,
FeatureFmaMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot7Insts,
FeatureDot10Insts,
- FeatureSupportsSRAMECC,
- FeatureImageGather4D16Bug])>;
+ FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_8 : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1260,13 +1261,9 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_9 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts,
+ FeatureImageInsts])>;
def FeatureISAVersion9_0_A : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1282,12 +1279,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureKernargPreload])>;
def FeatureISAVersion9_0_C : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_4_Common : FeatureSet<
[FeatureGFX9,
@@ -1367,6 +1360,10 @@ def FeatureISAVersion10_1_Common : FeatureSet<
FeatureFlatSegmentOffsetBug,
FeatureNegativeUnalignedScratchOffsetBug])>;
+def FeatureISAVersion10_1_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion10_1_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion10_1_0 : FeatureSet<
!listconcat(FeatureISAVersion10_1_Common.Features,
[])>;
@@ -1406,6 +1403,10 @@ def FeatureISAVersion10_3_0 : FeatureSet<
FeatureDot10Insts,
FeatureShaderCyclesRegister])>;
+def FeatureISAVersion10_3_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion10_3_0.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_Common : FeatureSet<
[FeatureGFX11,
FeatureLDSBankCount32,
@@ -1428,6 +1429,10 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureVcmpxPermlaneHazard,
FeatureMADIntraFwdBug]>;
+def FeatureISAVersion11_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_0_Common : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureMSAALoadDstSelBug,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
index 552380d54dfd09..9fab47b9cdf742 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
@@ -138,10 +138,10 @@ bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
const GCNSubtarget *ST =
static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
- // Check the GPU isn't generic. Generic is used for testing only
- // and we don't want this pass to interfere with it.
+ // Check the GPU isn't generic or generic-hsa. Generic is used for testing
+ // only and we don't want this pass to interfere with it.
StringRef GPUName = ST->getCPU();
- if (GPUName.empty() || GPUName.contains("generic"))
+ if (GPUName.empty() || GPUName.starts_with("generic"))
return false;
// Try to fetch the GPU's info. If we can't, it's likely an unknown processor
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index f19c5766856408..387a3cbc0a34b9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -162,6 +162,13 @@ GCNSubtarget::initializeSubtargetDependencies(const Triple &TT,
LLVM_DEBUG(dbgs() << "sramecc setting for subtarget: "
<< TargetID.getSramEccSetting() << '\n');
+ // FIXME(?): It's very ugly to crash, it'd be better to print a diagnostic.
+ if (RequiresCOV6 &&
+ AMDGPU::getAmdhsaCodeObjectVersion() < AMDGPU::AMDHSA_COV6)
+ report_fatal_error(
+ GPU + " is only available on code object version 6 or better",
+ /*gen_crash_diag*/ false);
+
return *this;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 96af1a6aab3da7..23c3b11a6fe935 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -204,6 +204,11 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
+// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
+def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
+ FeatureISAVersion9_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
@@ -252,6 +257,16 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel,
FeatureISAVersion10_3_0.Features
>;
+// [gfx1010, gfx1011, gfx1012, gfx1013]
+def : ProcessorModel<"gfx10.1-generic", GFX10SpeedModel,
+ FeatureISAVersion10_1_Generic.Features
+>;
+
+// [gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036]
+def : ProcessorModel<"gfx10.3-generic", GFX10SpeedModel,
+ FeatureISAVersion10_3_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX11.
//===----------------------------------------------------------------------===//
@@ -280,6 +295,11 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel,
FeatureISAVersion11_5_1.Features
>;
+// [gfx1100, gfx1101, gfx1102, gfx1103, 1150, 1151]
+def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
+ FeatureISAVersion11_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX12.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index f6f37f5170a403..863bd694182cdc 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -221,6 +221,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasVALUTransUseHazard = false;
bool HasForceStoreSC0SC1 = false;
+ bool RequiresCOV6 = false;
+
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 8b3822677345d8..fe7cc2b99a9a8f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -135,6 +135,10 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
default: AK = GK_NONE; break;
}
@@ -213,6 +217,10 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
+ case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
+ case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
+ case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
+ case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
// clang-format on
@@ -711,6 +719,24 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
unsigned Flags = getEFlagsV4();
unsigned Version = ForceGenericVersion;
+ if (!Version) {
+ switch (parseArchAMDGCN(STI.getCPU())) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ Version = GenericVersion::GFX9;
+ break;
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ Version = GenericVersion::GFX10_1;
+ break;
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ Version = GenericVersion::GFX10_3;
+ break;
+ case AMDGPU::GK_GFX11_GENERIC:
+ Version = GenericVersion::GFX11;
+ break;
+ default:
+ break;
+ }
+ }
// Versions start at 1.
if (Version) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index c2fdb4e817099c..46c8202d0026c6 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,6 +42,17 @@ namespace AMDGPU {
struct IsaVersion;
+/// Generic target versions emitted by this version of LLVM.
+///
+/// These numbers are incremented every time a codegen breaking change occurs
+/// within a generic family.
+namespace GenericVersion {
+static constexpr unsigned GFX9 = 1;
+static constexpr unsigned GFX10_1 = 1;
+static constexpr unsigned GFX10_3 = 1;
+static constexpr unsigned GFX11 = 1;
+} // namespace GenericVersion
+
enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
/// \returns True if \p STI is AMDHSA.
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index d741d2ce7942df..64736c0cfeabab 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -126,6 +126,11 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+
+ {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx10.1-generic"}, {"gfx10.1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx10.3-generic"}, {"gfx10.3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
// clang-format on
};
@@ -144,6 +149,22 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
} // namespace
+StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
+ switch (AK) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ return "gfx9";
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ return "gfx10";
+ case AMDGPU::GK_GFX11_GENERIC:
+ return "gfx11";
+ default: {
+ StringRef ArchName = getArchNameAMDGCN(AK);
+ return ArchName.empty() ? "" : ArchName.drop_back(2);
+ }
+ }
+}
+
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
return Entry->CanonicalName;
@@ -253,6 +274,12 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1151: return {11, 5, 1};
case GK_GFX1200: return {12, 0, 0};
case GK_GFX1201: return {12, 0, 1};
+
+ // Generic targets use the earliest ISA version in their group.
+ case GK_GFX9_GENERIC: return {9, 0, 0};
+ case GK_GFX10_1_GENERIC: return {10, 1, 0};
+ case GK_GFX10_3_GENERIC: return {10, 3, 0};
+ case GK_GFX11_GENERIC: return {11, 0, 0};
default: return {0, 0, 0};
}
// clang-format on
@@ -299,6 +326,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1102:
case GK_GFX1101:
case GK_GFX1100:
+ case GK_GFX11_GENERIC:
Features["ci-insts"] = true;
Features["dot5-insts"] = true;
Features["dot7-insts"] = true;
@@ -324,6 +352,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1032:
case GK_GFX1031:
case GK_GFX1030:
+ case GK_GFX10_3_GENERIC:
Features["ci-insts"] = true;
Features["dot1-insts"] = true;
Features["dot2-insts"] = true;
@@ -354,6 +383,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
[[fallthrough]];
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX10_1_GENERIC:
Features["dl-insts"] = true;
Features["ci-insts"] = true;
Features["16-bit-insts"] = true;
@@ -420,6 +450,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX904:
case GK_GFX902:
case GK_GFX900:
+ case GK_GFX9_GENERIC:
Features["gfx9-insts"] = true;
[[fallthrough]];
case GK_GFX810:
@@ -506,6 +537,9 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) {
case GK_GFX1011:
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX11_GENERIC:
+ case GK_GFX10_3_GENERIC:
+ case GK_GFX10_1_GENERIC:
IsWave32Capable = true;
break;
default:
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index 357fcf8ef15617..e890b25442c998 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -108,6 +108,11 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX1200 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck --check-prefixes=GFX1201 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic < %s | FileCheck --check-prefixes=GFX9_GENERIC %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic < %s | FileCheck --check-prefixes=GFX10_1_GENERIC %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck --check-prefixes=GFX11_GENERIC %s
+
; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602"
@@ -196,6 +201,11 @@
; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200"
; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201"
+; GFX9_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic"
+; GFX10_1_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic"
+; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.3-generic"
+; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx11-generic"
+
define amdgpu_kernel void @directive_amdgcn_target() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index 380439d8cd9c65..a70c3eb4666505 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -77,6 +77,11 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1200 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1200 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s
+
; FIXME: With the default attributes the eflags are not accurate for
; xnack and sramecc. Subsequent Target-ID patches will address this.
@@ -149,6 +154,11 @@
; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A)
; GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48)
; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
+
+; GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x4F)
+; GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x50)
+; GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x51)
+; GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x52)
; ALL: ]
define amdgpu_kernel void @elf_header() {
diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
index dc6fea4ba1a379..1a9334706cb927 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx9-generic --amdhsa-code-object-version=6 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
index 982d2c1f33dd4a..3a421eaef49bcf 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
@@ -2,6 +2,7 @@
; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; FUNC-LABEL: {{^}}atomic_add_ret_gds:
; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
new file mode 100644
index 00000000000000..e3f4b14bac0c16
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
@@ -0,0 +1,18 @@
+; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s
+
+; GFX9-V5: gfx9-generic is only available on code object version 6 or better
+; GFX101-V5: gfx10.1-generic is only available on code object version 6 or better
+; GFX103-V5: gfx10.3-generic is only available on code object version 6 or better
+; GFX11-V5: gfx11-generic is only available on code object version 6 or better
+
+define void @foo() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
index 094a1fd88e583f..3da88d57c5d66f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
@@ -1,8 +1,11 @@
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED %s
; RUN: llc < %s -march=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
; RUN: llc < %s -march=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -march=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s
; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
index 86bf6f03bcfd1c..a236c433e3a4dc 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
+; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -march=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
; RUN: llc -march=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
index 9972f490fed6f9..9fe188a9e052d7 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
@@ -1,15 +1,13 @@
-; RUN: llc -O0 -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s
-; RUN: llc -O0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
+; RUN: llc -O0 -march=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: not --crash llc -O0 -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s
; RUN: not --crash llc -O0 -march=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX940 %s
; RUN: llc -O0 -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s
; RUN: llc -O0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s
-; GFX906-LABEL: image_sample_test:
-; GFX906: image_sample_lz
-
-; GFX908-LABEL: image_sample_test:
-; GFX908: image_sample_lz
+; GFX9-LABEL: image_sample_test:
+; GFX9: image_sample_lz
; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 7fb33ca662b190..806d9ad8aad229 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -238,6 +238,23 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1201 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1201 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX9_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_1_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_1_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_1_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_3_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_3_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_3_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX11_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX11_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX11_GENERIC %s
+
+
# ELF-R600-ALL: Format: elf32-amdgpu
# ELF-R600-ALL: Arch: r600
# ELF-R600-ALL: AddressSize: 32bit
@@ -435,6 +452,18 @@
# ELF-AMDGCN-GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
# YAML-AMDGCN-GFX1201: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1201 ]
+# ELF-AMDGCN-GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0xC0)
+# YAML-AMDGCN-GFX9_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC ]
+
+# ELF-AMDGCN-GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0xC1)
+# YAML-AMDGCN-GFX10_1_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC ]
+
+# ELF-AMDGCN-GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0xC2)
+# YAML-AMDGCN-GFX10_3_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC ]
+
+# ELF-AMDGCN-GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0xC3)
+# YAML-AMDGCN-GFX11_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC ]
+
# ELF-AMDGCN-ALL: ]
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index e296d7fb1fc8f2..ca136a6a0d5ba2 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -18,6 +18,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX11--------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx11-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1151 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -49,6 +54,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: diff %t-specify.txt %t-detect.txt
; ----------------------------------GFX10--------------------------------------
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.3-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.3-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1036 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -84,6 +94,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llvm-objdump -D %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.1-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.1-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1013 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -107,6 +122,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
index e2266d81d1a597..e6b9375c898a93 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
@@ -253,6 +253,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC -DFLAG_VALUE=0xC1
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
@@ -322,6 +325,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC -DFLAG_VALUE=0xC2
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
@@ -355,6 +361,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC -DFLAG_VALUE=0xC0
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
@@ -391,6 +400,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103 -DFLAG_VALUE=0x44
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC -DFLAG_VALUE=0xC3
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150 -DFLAG_VALUE=0x43
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index d1382e829bb3aa..4a4dcf07ec8a36 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1561,68 +1561,72 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
};
// clang-format off
-#define AMDGPU_MACH_ENUM_ENTS \
- ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201")
+#define AMDGPU_MACH_ENUM_ENTS \
+ ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10.1-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10.3-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic")
// clang-format on
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
>From 11e1956baaa0d7fa0441991a2de83d87f9edd1e0 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 10 Jan 2024 16:27:53 +0100
Subject: [PATCH 3/3] Fix GFX11-generic features
---
llvm/lib/Target/AMDGPU/AMDGPU.td | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 35369f1c3a98e1..12c3d771e6eb24 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1429,9 +1429,15 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureVcmpxPermlaneHazard,
FeatureMADIntraFwdBug]>;
+// There are few workarounds that need to be
+// added to all targets. This pessimizes codegen
+// a bit on the generic GFX11 target.
def FeatureISAVersion11_Generic: FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
- [FeatureRequiresCOV6])>;
+ [FeatureMSAALoadDstSelBug,
+ FeatureVALUTransUseHazard,
+ FeatureUserSGPRInit16Bug,
+ FeatureRequiresCOV6])>;
def FeatureISAVersion11_0_Common : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
More information about the cfe-commits
mailing list