[clang] [llvm] [lld] [flang] [AMDGPU] Introduce GFX9/10.1/10.3/11 Generic Targets (PR #76955)
Pierre van Houtryve via cfe-commits
cfe-commits at lists.llvm.org
Thu Jan 18 04:28:47 PST 2024
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/76955
>From 47d4f3ed4e27f2ce2b3b33c9b0ca4838b3011f22 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 4 Jan 2024 14:12:00 +0100
Subject: [PATCH 1/3] [AMDGPU] Introduce Code Object V6
Introduce Code Object V6 in Clang, LLD, Flang and LLVM.
This is the same as V5 except a new "generic version" flag can be present in EFLAGS. This is related to new generic targets that'll be added in a follow-up patch. It's also likely V6 will have new changes (possibly new metadata entries) added later.
Docs change are not included, I'm planning to do them in a follow-up patch all at once (when generic targets land too).
---
clang/include/clang/Driver/Options.td | 4 +-
clang/lib/CodeGen/CGBuiltin.cpp | 6 +-
clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +-
.../amdgpu-code-object-version-linking.cu | 37 +++
.../CodeGenCUDA/amdgpu-code-object-version.cu | 4 +
.../test/CodeGenCUDA/amdgpu-workgroup-size.cu | 4 +
.../amdgcn/bitcode/oclc_abi_version_600.bc | 0
clang/test/Driver/hip-code-object-version.hip | 12 +
clang/test/Driver/hip-device-libs.hip | 18 +-
flang/lib/Frontend/CompilerInvocation.cpp | 2 +
flang/test/Lower/AMD/code-object-version.f90 | 3 +-
lld/ELF/Arch/AMDGPU.cpp | 21 ++
lld/test/ELF/amdgpu-tid.s | 16 ++
llvm/include/llvm/BinaryFormat/ELF.h | 9 +-
llvm/include/llvm/Support/AMDGPUMetadata.h | 5 +
llvm/include/llvm/Support/ScopedPrinter.h | 4 +-
llvm/include/llvm/Target/TargetOptions.h | 1 +
llvm/lib/ObjectYAML/ELFYAML.cpp | 9 +
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 +
.../AMDGPU/AMDGPUHSAMetadataStreamer.cpp | 10 +
.../Target/AMDGPU/AMDGPUHSAMetadataStreamer.h | 11 +-
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 27 +++
.../MCTargetDesc/AMDGPUTargetStreamer.h | 1 +
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 13 +
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 5 +-
...licit-kernarg-backend-usage-global-isel.ll | 2 +
.../AMDGPU/call-graph-register-usage.ll | 1 +
.../AMDGPU/codegen-internal-only-func.ll | 2 +
llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll | 4 +
.../enable-scratch-only-dynamic-stack.ll | 1 +
.../AMDGPU/implicit-kernarg-backend-usage.ll | 2 +
.../AMDGPU/implicitarg-offset-attributes.ll | 46 ++++
.../AMDGPU/llvm.amdgcn.implicitarg.ptr.ll | 1 +
llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll | 1 +
llvm/test/CodeGen/AMDGPU/recursion.ll | 1 +
.../AMDGPU/resource-usage-dead-function.ll | 1 +
.../AMDGPU/tid-mul-func-xnack-all-any.ll | 6 +
.../tid-mul-func-xnack-all-not-supported.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-all-off.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-all-on.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-any-off-1.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-any-off-2.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-any-on-1.ll | 6 +
.../AMDGPU/tid-mul-func-xnack-any-on-2.ll | 6 +
.../tid-one-func-xnack-not-supported.ll | 6 +
.../CodeGen/AMDGPU/tid-one-func-xnack-off.ll | 6 +
.../CodeGen/AMDGPU/tid-one-func-xnack-on.ll | 6 +
.../MC/AMDGPU/hsa-v5-uses-dynamic-stack.s | 5 +
.../elf-headers.test} | 0
.../ELF/AMDGPU/generic_versions.s | 16 ++
.../ELF/AMDGPU/generic_versions.test | 26 ++
llvm/tools/llvm-readobj/ELFDumper.cpp | 224 ++++++++----------
52 files changed, 491 insertions(+), 135 deletions(-)
create mode 100644 clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc
rename llvm/test/tools/llvm-readobj/ELF/{amdgpu-elf-headers.test => AMDGPU/elf-headers.test} (100%)
create mode 100644 llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s
create mode 100644 llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index e4fdad8265c863..a6b96ea027056e 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4763,9 +4763,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
Visibility<[ClangOption, FlangOption, CC1Option, FC1Option]>,
- Values<"none,4,5">,
+ Values<"none,4,5,6">,
NormalizedValuesScope<"llvm::CodeObjectVersionKind">,
- NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
+ NormalizedValues<["COV_None", "COV_4", "COV_5", "COV_6"]>,
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
defm cumode : SimpleMFlag<"cumode",
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f4246c5e8f68e8..16dbb4bd835df5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -17721,9 +17721,9 @@ Value *EmitAMDGPUImplicitArgPtr(CodeGenFunction &CGF) {
// \p Index is 0, 1, and 2 for x, y, and z dimension, respectively.
/// Emit code based on Code Object ABI version.
/// COV_4 : Emit code to use dispatch ptr
-/// COV_5 : Emit code to use implicitarg ptr
+/// COV_5+ : Emit code to use implicitarg ptr
/// COV_NONE : Emit code to load a global variable "__oclc_ABI_version"
-/// and use its value for COV_4 or COV_5 approach. It is used for
+/// and use its value for COV_4 or COV_5+ approach. It is used for
/// compiling device libraries in an ABI-agnostic way.
///
/// Note: "__oclc_ABI_version" is supposed to be emitted and intialized by
@@ -17766,7 +17766,7 @@ Value *EmitAMDGPUWorkGroupSize(CodeGenFunction &CGF, unsigned Index) {
Address(Result, CGF.Int16Ty, CharUnits::fromQuantity(2)));
} else {
Value *GEP = nullptr;
- if (Cov == CodeObjectVersionKind::COV_5) {
+ if (Cov >= CodeObjectVersionKind::COV_5) {
// Indexing the implicit kernarg segment.
GEP = CGF.Builder.CreateConstGEP1_32(
CGF.Int8Ty, EmitAMDGPUImplicitArgPtr(CGF), 12 + Index * 2);
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 385f66f3782bc1..bb2a9501c6b5d7 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2588,7 +2588,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
const unsigned MinCodeObjVer = 4;
- const unsigned MaxCodeObjVer = 5;
+ const unsigned MaxCodeObjVer = 6;
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
if (CodeObjArg->getOption().getID() ==
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
index 663687ae227f23..d33acdf7eb8bed 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version-linking.cu
@@ -4,6 +4,9 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
// RUN: -mcode-object-version=5 -DUSER -x hip -o %t_5.bc %s
+// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
+// RUN: -mcode-object-version=6 -DUSER -x hip -o %t_6.bc %s
+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm-bc \
// RUN: -mcode-object-version=none -DDEVICELIB -x hip -o %t_0.bc %s
@@ -15,6 +18,10 @@
// RUN: %t_5.bc -mlink-builtin-bitcode %t_0.bc -o - |\
// RUN: FileCheck -check-prefix=LINKED5 %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm -O3 \
+// RUN: %t_6.bc -mlink-builtin-bitcode %t_0.bc -o - |\
+// RUN: FileCheck -check-prefix=LINKED6 %s
+
#include "Inputs/cuda.h"
// LINKED4: @__oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 400
@@ -77,6 +84,36 @@
// LINKED5: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
// LINKED5: "amdgpu_code_object_version", i32 500
+// LINKED6: __oclc_ABI_version = weak_odr hidden local_unnamed_addr addrspace(4) constant i32 600
+// LINKED6-LABEL: bar
+// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
+// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
+// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LINKED6: [[GEP_5_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 12
+// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LINKED6: [[GEP_4_X:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 4
+// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_X]], ptr addrspace(4) [[GEP_4_X]]
+// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+
+// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
+// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
+// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LINKED6: [[GEP_5_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 14
+// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LINKED6: [[GEP_4_Y:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 6
+// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Y]], ptr addrspace(4) [[GEP_4_Y]]
+// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+
+// LINKED6-NOT: load i32, ptr addrspacecast (ptr addrspace(4) @__oclc_ABI_version to ptr), align {{.*}}
+// LINKED6-NOT: icmp sge i32 %{{.*}}, 500
+// LINKED6: call align 8 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+// LINKED6: [[GEP_5_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 16
+// LINKED6: call align 4 dereferenceable(64) ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+// LINKED6: [[GEP_4_Z:%.*]] = getelementptr i8, ptr addrspace(4) %{{.*}}, i32 8
+// LINKED6: select i1 true, ptr addrspace(4) [[GEP_5_Z]], ptr addrspace(4) [[GEP_4_Z]]
+// LINKED6: load i16, ptr addrspace(4) %{{.*}}, align 2, !range [[$WS_RANGE:![0-9]*]], !invariant.load{{.*}}, !noundef
+// LINKED6: "amdgpu_code_object_version", i32 600
+
#ifdef DEVICELIB
__device__ void bar(int *x, int *y, int *z)
{
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index ff5deaf9ab850d..59636e622731b8 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -9,6 +9,9 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=5 -o - %s | FileCheck -check-prefix=V5 %s
+// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
+// RUN: -mcode-object-version=6 -o - %s | FileCheck -check-prefix=V6 %s
+
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=none -o - %s | FileCheck %s -check-prefix=NONE
@@ -17,5 +20,6 @@
// V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400}
// V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500}
+// V6: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 600}
// NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version",
// INV: error: invalid value '4.1' in '-mcode-object-version=4.1'
diff --git a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
index 282e0a49b9aa10..7f56fe91704870 100644
--- a/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-workgroup-size.cu
@@ -7,6 +7,10 @@
// RUN: -fcuda-is-device -mcode-object-version=5 -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COV5 %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
+// RUN: -fcuda-is-device -mcode-object-version=6 -emit-llvm -o - -x hip %s \
+// RUN: | FileCheck -check-prefix=COV5 %s
+
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa \
// RUN: -fcuda-is-device -mcode-object-version=none -emit-llvm -o - -x hip %s \
// RUN: | FileCheck -check-prefix=COVNONE %s
diff --git a/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc b/clang/test/Driver/Inputs/rocm/amdgcn/bitcode/oclc_abi_version_600.bc
new file mode 100644
index 00000000000000..e69de29bb2d1d6
diff --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip
index af5f9a3da21dfd..d63130115588e0 100644
--- a/clang/test/Driver/hip-code-object-version.hip
+++ b/clang/test/Driver/hip-code-object-version.hip
@@ -23,6 +23,18 @@
// V5: "-mllvm" "--amdhsa-code-object-version=5"
// V5: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906"
+// Check bundle ID for code object version 6.
+
+// RUN: not %clang -### --target=x86_64-linux-gnu \
+// RUN: -mcode-object-version=6 \
+// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
+// RUN: %s 2>&1 | FileCheck -check-prefix=V6 %s
+
+// V6: "-mcode-object-version=6"
+// V6: "-mllvm" "--amdhsa-code-object-version=6"
+// V6: "-targets=host-x86_64-unknown-linux,hipv4-amdgcn-amd-amdhsa--gfx906"
+
+
// Check bundle ID for code object version default
// RUN: %clang -### --target=x86_64-linux-gnu \
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index 6ac5778721ba5b..a998db531d6683 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -187,13 +187,26 @@
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI5
-// Test -mcode-object-version=5 with old device library without abi_version_400.bc
+// Test -mcode-object-version=5 with old device library without abi_version_500.bc
// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: -mcode-object-version=5 \
// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI5
+// Test -mcode-object-version=6
+// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -mcode-object-version=6 \
+// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI6
+
+// Test -mcode-object-version=6 with old device library without abi_version_600.bc
+// RUN: not %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
+// RUN: -mcode-object-version=6 \
+// RUN: --hip-device-lib-path=%S/Inputs/rocm/amdgcn/bitcode-no-abi-ver \
+// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
+// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI6
+
// ALL-NOT: error:
// ALL: {{"[^"]*clang[^"]*"}}
@@ -237,7 +250,10 @@
// ABI4: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc"
// ABI5-NOT: error:
// ABI5: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc"
+// ABI6-NOT: error:
+// ABI6: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_600.bc"
// NOABI4-NOT: error:
// NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_400.bc"
// NOABI4-NOT: "-mlink-builtin-bitcode" "{{.*}}oclc_abi_version_500.bc"
// NOABI5: error: cannot find ROCm device libraryfor ABI version 5; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
+// NOABI6: error: cannot find ROCm device libraryfor ABI version 6; provide its path via '--rocm-path' or '--rocm-device-lib-path', or pass '-nogpulib' to build without ROCm device library
diff --git a/flang/lib/Frontend/CompilerInvocation.cpp b/flang/lib/Frontend/CompilerInvocation.cpp
index a3c41fb4611f56..ffde7f50087e52 100644
--- a/flang/lib/Frontend/CompilerInvocation.cpp
+++ b/flang/lib/Frontend/CompilerInvocation.cpp
@@ -284,6 +284,8 @@ static void parseCodeGenArgs(Fortran::frontend::CodeGenOptions &opts,
if (const llvm::opt::Arg *a = args.getLastArg(
clang::driver::options::OPT_mcode_object_version_EQ)) {
llvm::StringRef s = a->getValue();
+ if (s == "6")
+ opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_6;
if (s == "5")
opts.CodeObjectVersion = llvm::CodeObjectVersionKind::COV_5;
if (s == "4")
diff --git a/flang/test/Lower/AMD/code-object-version.f90 b/flang/test/Lower/AMD/code-object-version.f90
index 7cb9dc079724e7..455f4547252829 100644
--- a/flang/test/Lower/AMD/code-object-version.f90
+++ b/flang/test/Lower/AMD/code-object-version.f90
@@ -3,11 +3,12 @@
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=none %s -o - | FileCheck --check-prefix=COV_NONE %s
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=4 %s -o - | FileCheck --check-prefix=COV_4 %s
!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s
+!RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=6 %s -o - | FileCheck --check-prefix=COV_6 %s
!COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
!COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
!COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32
!COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32
+!COV_6: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(600 : i32) {addr_space = 4 : i32} : i32
subroutine target_simple
end subroutine target_simple
-
diff --git a/lld/ELF/Arch/AMDGPU.cpp b/lld/ELF/Arch/AMDGPU.cpp
index 650744db7dee32..d9440acec9ddad 100644
--- a/lld/ELF/Arch/AMDGPU.cpp
+++ b/lld/ELF/Arch/AMDGPU.cpp
@@ -25,6 +25,7 @@ class AMDGPU final : public TargetInfo {
private:
uint32_t calcEFlagsV3() const;
uint32_t calcEFlagsV4() const;
+ uint32_t calcEFlagsV6() const;
public:
AMDGPU();
@@ -106,6 +107,24 @@ uint32_t AMDGPU::calcEFlagsV4() const {
return retMach | retXnack | retSramEcc;
}
+uint32_t AMDGPU::calcEFlagsV6() const {
+ uint32_t flags = calcEFlagsV4();
+
+ uint32_t genericVersion =
+ getEFlags(ctx.objectFiles[0]) & EF_AMDGPU_GENERIC_VERSION;
+
+ // Verify that all input files have compatible generic version.
+ for (InputFile *f : ArrayRef(ctx.objectFiles).slice(1)) {
+ if (genericVersion != (getEFlags(f) & EF_AMDGPU_GENERIC_VERSION)) {
+ error("incompatible generic version: " + toString(f));
+ return 0;
+ }
+ }
+
+ flags |= genericVersion;
+ return flags;
+}
+
uint32_t AMDGPU::calcEFlags() const {
if (ctx.objectFiles.empty())
return 0;
@@ -121,6 +140,8 @@ uint32_t AMDGPU::calcEFlags() const {
case ELFABIVERSION_AMDGPU_HSA_V4:
case ELFABIVERSION_AMDGPU_HSA_V5:
return calcEFlagsV4();
+ case ELFABIVERSION_AMDGPU_HSA_V6:
+ return calcEFlagsV6();
default:
error("unknown abi version: " + Twine(abiVersion));
return 0;
diff --git a/lld/test/ELF/amdgpu-tid.s b/lld/test/ELF/amdgpu-tid.s
index 6623443a4541d7..ee0062eb750c86 100644
--- a/lld/test/ELF/amdgpu-tid.s
+++ b/lld/test/ELF/amdgpu-tid.s
@@ -43,3 +43,19 @@
# SRAMECC-OFF: EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 (0x800)
# SRAMECC-ON: EF_AMDGPU_FEATURE_SRAMECC_ON_V4 (0xC00)
# SRAMECC-INCOMPATIBLE: incompatible sramecc:
+
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_0.o
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -filetype=obj %s -o %t-genericv1_1.o
+# RUN: ld.lld -shared %t-genericv1_0.o %t-genericv1_1.o -o %t-genericv1_2.so
+# RUN: llvm-readobj --file-headers %t-genericv1_2.so | FileCheck --check-prefix=GENERICV1 %s
+
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_0.o
+# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --amdgpu-force-generic-version=2 -filetype=obj %s -o %t-genericv2_1.o
+# RUN: ld.lld -shared %t-genericv2_0.o %t-genericv2_1.o -o %t-genericv2_2.so
+# RUN: llvm-readobj --file-headers %t-genericv2_2.so | FileCheck --check-prefix=GENERICV2 %s
+
+# RUN: not ld.lld -shared %t-genericv1_0.o %t-genericv2_0.o -o /dev/null 2>&1 | FileCheck --check-prefix=GENERIC-INCOMPATIBLE %s
+
+# GENERICV1: EF_AMDGPU_GENERIC_VERSION_V1 (0x1000000)
+# GENERICV2: EF_AMDGPU_GENERIC_VERSION_V2 (0x2000000)
+# GENERIC-INCOMPATIBLE: incompatible generic version
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 81cdd39afc6bab..efd41f9812baa0 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -375,7 +375,8 @@ enum {
ELFABIVERSION_AMDGPU_HSA_V2 = 0,
ELFABIVERSION_AMDGPU_HSA_V3 = 1,
ELFABIVERSION_AMDGPU_HSA_V4 = 2,
- ELFABIVERSION_AMDGPU_HSA_V5 = 3
+ ELFABIVERSION_AMDGPU_HSA_V5 = 3,
+ ELFABIVERSION_AMDGPU_HSA_V6 = 4,
};
#define ELF_RELOC(name, value) name = value,
@@ -842,6 +843,12 @@ enum : unsigned {
EF_AMDGPU_FEATURE_SRAMECC_OFF_V4 = 0x800,
// SRAMECC is on.
EF_AMDGPU_FEATURE_SRAMECC_ON_V4 = 0xc00,
+
+ // Generic target versioning. This is contained in the list byte of EFLAGS.
+ EF_AMDGPU_GENERIC_VERSION = 0xff000000,
+ EF_AMDGPU_GENERIC_VERSION_OFFSET = 24,
+ EF_AMDGPU_GENERIC_VERSION_MIN = 1,
+ EF_AMDGPU_GENERIC_VERSION_MAX = 0xff,
};
// ELF Relocation types for AMDGPU
diff --git a/llvm/include/llvm/Support/AMDGPUMetadata.h b/llvm/include/llvm/Support/AMDGPUMetadata.h
index e0838a1f425ea5..46bca63e910aef 100644
--- a/llvm/include/llvm/Support/AMDGPUMetadata.h
+++ b/llvm/include/llvm/Support/AMDGPUMetadata.h
@@ -49,6 +49,11 @@ constexpr uint32_t VersionMajorV5 = 1;
/// HSA metadata minor version for code object V5.
constexpr uint32_t VersionMinorV5 = 2;
+/// HSA metadata major version for code object V6.
+constexpr uint32_t VersionMajorV6 = 1;
+/// HSA metadata minor version for code object V6.
+constexpr uint32_t VersionMinorV6 = 3;
+
/// HSA metadata beginning assembler directive.
constexpr char AssemblerDirectiveBegin[] = ".amd_amdgpu_hsa_metadata";
/// HSA metadata ending assembler directive.
diff --git a/llvm/include/llvm/Support/ScopedPrinter.h b/llvm/include/llvm/Support/ScopedPrinter.h
index aaaed3f5ceac62..7f627cdd90b4ce 100644
--- a/llvm/include/llvm/Support/ScopedPrinter.h
+++ b/llvm/include/llvm/Support/ScopedPrinter.h
@@ -160,7 +160,7 @@ class ScopedPrinter {
template <typename T, typename TFlag>
void printFlags(StringRef Label, T Value, ArrayRef<EnumEntry<TFlag>> Flags,
TFlag EnumMask1 = {}, TFlag EnumMask2 = {},
- TFlag EnumMask3 = {}) {
+ TFlag EnumMask3 = {}, TFlag EnumMask4 = {}) {
SmallVector<FlagEntry, 10> SetFlags;
for (const auto &Flag : Flags) {
@@ -174,6 +174,8 @@ class ScopedPrinter {
EnumMask = EnumMask2;
else if (Flag.Value & EnumMask3)
EnumMask = EnumMask3;
+ else if (Flag.Value & EnumMask4)
+ EnumMask = EnumMask4;
bool IsEnum = (Flag.Value & EnumMask) != 0;
if ((!IsEnum && (Value & Flag.Value) == Flag.Value) ||
(IsEnum && (Value & EnumMask) == Flag.Value)) {
diff --git a/llvm/include/llvm/Target/TargetOptions.h b/llvm/include/llvm/Target/TargetOptions.h
index 4df897c047a38a..c977e450c489d5 100644
--- a/llvm/include/llvm/Target/TargetOptions.h
+++ b/llvm/include/llvm/Target/TargetOptions.h
@@ -129,6 +129,7 @@ namespace llvm {
COV_3 = 300, // Unsupported.
COV_4 = 400,
COV_5 = 500,
+ COV_6 = 600,
};
class TargetOptions {
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 6ad4a067415ac8..ff7151e016a9ba 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -620,6 +620,15 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCase(EF_AMDGPU_FEATURE_XNACK_V3);
BCase(EF_AMDGPU_FEATURE_SRAMECC_V3);
break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
+ for (unsigned K = ELF::EF_AMDGPU_GENERIC_VERSION_MIN;
+ K <= ELF::EF_AMDGPU_GENERIC_VERSION_MAX; ++K) {
+ std::string Key = "EF_AMDGPU_GENERIC_VERSION_V" + std::to_string(K);
+ IO.maskedBitSetCase(Value, Key.c_str(),
+ K << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET,
+ ELF::EF_AMDGPU_GENERIC_VERSION);
+ }
+ [[fallthrough]];
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
BCaseMask(EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 10f7e7a26edb4a..74f1a2e8561b89 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -333,6 +333,9 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
case AMDGPU::AMDHSA_COV5:
HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV5());
break;
+ case AMDGPU::AMDHSA_COV6:
+ HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV6());
+ break;
default:
report_fatal_error("Unexpected code object version");
}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 74e9cd7d09654c..69ef5cde199776 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -677,6 +677,16 @@ void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
}
+//===----------------------------------------------------------------------===//
+// HSAMetadataStreamerV6
+//===----------------------------------------------------------------------===//
+
+void MetadataStreamerMsgPackV6::emitVersion() {
+ auto Version = HSAMetadataDoc->getArrayNode();
+ Version.push_back(Version.getDocument()->getNode(VersionMajorV6));
+ Version.push_back(Version.getDocument()->getNode(VersionMinorV6));
+ getRootMetadata("amdhsa.version") = Version;
+}
} // end namespace HSAMD
} // end namespace AMDGPU
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index 6d6bd86711b13f..26229af638f22a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -135,7 +135,7 @@ class MetadataStreamerMsgPackV4 : public MetadataStreamer {
const SIProgramInfo &ProgramInfo) override;
};
-class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 {
+class MetadataStreamerMsgPackV5 : public MetadataStreamerMsgPackV4 {
protected:
void emitVersion() override;
void emitHiddenKernelArgs(const MachineFunction &MF, unsigned &Offset,
@@ -147,6 +147,15 @@ class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 {
~MetadataStreamerMsgPackV5() = default;
};
+class MetadataStreamerMsgPackV6 final : public MetadataStreamerMsgPackV5 {
+protected:
+ void emitVersion() override;
+
+public:
+ MetadataStreamerMsgPackV6() = default;
+ ~MetadataStreamerMsgPackV6() = default;
+};
+
} // end namespace HSAMD
} // end namespace AMDGPU
} // end namespace llvm
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index e135a4e25dd15a..8b3822677345d8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -25,6 +25,7 @@
#include "llvm/Support/AMDGPUMetadata.h"
#include "llvm/Support/AMDHSAKernelDescriptor.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/TargetParser/TargetParser.h"
@@ -35,6 +36,12 @@ using namespace llvm::AMDGPU;
// AMDGPUTargetStreamer
//===----------------------------------------------------------------------===//
+static cl::opt<unsigned>
+ ForceGenericVersion("amdgpu-force-generic-version",
+ cl::desc("Force a specific generic_v<N> flag to be "
+ "added. For testing purposes only."),
+ cl::ReallyHidden, cl::init(0));
+
static void convertIsaVersionV2(uint32_t &Major, uint32_t &Minor,
uint32_t &Stepping, bool Sramecc, bool Xnack) {
if (Major == 9 && Minor == 0) {
@@ -434,6 +441,7 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
break;
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
+ case AMDGPU::AMDHSA_COV6:
if (getTargetID()->isXnackSupported())
OS << "\t\t.amdhsa_reserve_xnack_mask " << getTargetID()->isXnackOnOrAny() << '\n';
break;
@@ -623,6 +631,8 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsAMDHSA() {
case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
return getEFlagsV4();
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
+ return getEFlagsV6();
}
}
@@ -697,6 +707,23 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV4() {
return EFlagsV4;
}
+unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
+ unsigned Flags = getEFlagsV4();
+
+ unsigned Version = ForceGenericVersion;
+
+ // Versions start at 1.
+ if (Version) {
+ if (Version > ELF::EF_AMDGPU_GENERIC_VERSION_MAX)
+ report_fatal_error("Cannot encode generic code object version " +
+ Twine(Version) +
+ " - no ELF flag can represent this version!");
+ Flags |= (Version << ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
+ }
+
+ return Flags;
+}
+
void AMDGPUTargetELFStreamer::EmitDirectiveAMDGCNTarget() {}
void AMDGPUTargetELFStreamer::EmitDirectiveHSACodeObjectVersion(
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
index 55b5246c92100a..a2f9b414dd50c5 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.h
@@ -188,6 +188,7 @@ class AMDGPUTargetELFStreamer final : public AMDGPUTargetStreamer {
unsigned getEFlagsV3();
unsigned getEFlagsV4();
+ unsigned getEFlagsV6();
public:
AMDGPUTargetELFStreamer(MCStreamer &S, const MCSubtargetInfo &STI);
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 26ba2575ff34ac..e8b9cec90fc69f 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -132,6 +132,8 @@ std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
case 5:
return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
+ case 6:
+ return ELF::ELFABIVERSION_AMDGPU_HSA_V6;
default:
report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
Twine(AmdhsaCodeObjectVersion));
@@ -150,6 +152,12 @@ bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
return false;
}
+bool isHsaAbiVersion6(const MCSubtargetInfo *STI) {
+ if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
+ return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V6;
+ return false;
+}
+
unsigned getAmdhsaCodeObjectVersion() {
return AmdhsaCodeObjectVersion;
}
@@ -169,6 +177,7 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
case AMDHSA_COV4:
return 48;
case AMDHSA_COV5:
+ case AMDHSA_COV6:
default:
return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
}
@@ -182,6 +191,7 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
case AMDHSA_COV4:
return 24;
case AMDHSA_COV5:
+ case AMDHSA_COV6:
default:
return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
}
@@ -192,6 +202,7 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
case AMDHSA_COV4:
return 32;
case AMDHSA_COV5:
+ case AMDHSA_COV6:
default:
return AMDGPU::ImplicitArg::DEFAULT_QUEUE_OFFSET;
}
@@ -202,6 +213,7 @@ unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
case AMDHSA_COV4:
return 40;
case AMDHSA_COV5:
+ case AMDHSA_COV6:
default:
return AMDGPU::ImplicitArg::COMPLETION_ACTION_OFFSET;
}
@@ -782,6 +794,7 @@ std::string AMDGPUTargetID::toString() const {
switch (CodeObjectVersion) {
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
+ case AMDGPU::AMDHSA_COV6:
// sramecc.
if (getSramEccSetting() == TargetIDSetting::Off)
Features += ":sramecc-";
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 50c741760d7143..c2fdb4e817099c 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,7 +42,7 @@ namespace AMDGPU {
struct IsaVersion;
-enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 };
+enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
/// \returns True if \p STI is AMDHSA.
bool isHsaAbi(const MCSubtargetInfo &STI);
@@ -54,6 +54,9 @@ bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 5,
/// false otherwise.
bool isHsaAbiVersion5(const MCSubtargetInfo *STI);
+/// \returns True if HSA OS ABI Version identification is 6,
+/// false otherwise.
+bool isHsaAbiVersion6(const MCSubtargetInfo *STI);
/// \returns The offset of the multigrid_sync_arg argument from implicitarg_ptr
unsigned getMultigridSyncArgImplicitArgPosition(unsigned COV);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index 4bdbe6604782a8..03374e62e7e9f6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -1,9 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
; GFX8V4-LABEL: addrspacecast:
diff --git a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
index bae693ba2fa3be..2e43f685fd70a0 100644
--- a/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
@@ -1,5 +1,6 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,CI %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN-V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-NOBUG %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland -enable-ipra=0 -verify-machineinstrs | FileCheck -check-prefixes=GCN,VI,VI-BUG %s
diff --git a/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll
index 680fae1869600e..a47a54e3e1d668 100644
--- a/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll
+++ b/llvm/test/CodeGen/AMDGPU/codegen-internal-only-func.ll
@@ -2,6 +2,7 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=OPT,COV4 %s
; RUN: not llc --crash -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -filetype=null %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=OPT,COV5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=OPT,COV6 %s
; AMDGPUAttributor deletes the function "by accident" so it's never
; codegened with optimizations.
@@ -17,6 +18,7 @@
; OPT-NEXT: - 1
; COV4: - 1
; COV5: - 2
+; COV6: - 3
; OPT: ...
define internal i32 @func() {
ret i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
index f8fc3e1e764801..8178fecbbbe5f4 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-osabi.ll
@@ -7,6 +7,9 @@
; RUN: llc -filetype=obj -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=5 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA5 %s
; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=5 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA5 %s
; RUN: llc -filetype=obj -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=5 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA5 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn--amdhsa -mcpu=gfx801 --amdhsa-code-object-version=6 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA6 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=6 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA6 %s
+; RUN: llc -filetype=obj -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx801 --amdhsa-code-object-version=6 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=HSA,HSA6 %s
; RUN: llc -filetype=obj -mtriple=amdgcn--amdpal -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=PAL %s
; RUN: llc -filetype=obj -mtriple=amdgcn-amd-amdpal -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=PAL %s
; RUN: llc -filetype=obj -mtriple=amdgcn-unknown-amdpal -mcpu=gfx801 < %s | llvm-readobj --file-headers - | FileCheck --check-prefixes=PAL %s
@@ -18,6 +21,7 @@
; HSA: OS/ABI: AMDGPU_HSA (0x40)
; HSA4: ABIVersion: 2
; HSA5: ABIVersion: 3
+; HSA6: ABIVersion: 4
; PAL: OS/ABI: AMDGPU_PAL (0x41)
; PAL: ABIVersion: 0
; MESA3D: OS/ABI: AMDGPU_MESA3D (0x42)
diff --git a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
index 22f90682aa9738..d91c899a27ebf3 100644
--- a/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
+++ b/llvm/test/CodeGen/AMDGPU/enable-scratch-only-dynamic-stack.ll
@@ -1,3 +1,4 @@
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck -check-prefixes=GCN,COV4 %s
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
index 9e6c0ef86906dd..30fe4a80e693b9 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
@@ -1,9 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
; GFX8V4-LABEL: addrspacecast:
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index d5590754d78bc7..a8263a317baac8 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | opt -S -mtriple=amdgcn-unknown-unknown -passes=amdgpu-attributor | FileCheck -check-prefixes=CHECK,V6 %s
declare ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #0
@@ -122,6 +123,15 @@ define amdgpu_kernel void @test_completion_action_offset_v4_0(ptr addrspace(1) %
; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
; V5-NEXT: ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v4_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V6-NEXT: call void @use_everything_else()
+; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 40
+; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V6-NEXT: ret void
;
call void @use_everything_else()
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -149,6 +159,15 @@ define amdgpu_kernel void @test_completion_action_offset_v5_0(ptr addrspace(1) %
; V5-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
; V5-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
; V5-NEXT: ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_completion_action_offset_v5_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR4:[0-9]+]] {
+; V6-NEXT: call void @use_everything_else()
+; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 112
+; V6-NEXT: [[LOAD:%.*]] = load ptr, ptr addrspace(4) [[GEP]], align 8
+; V6-NEXT: store ptr [[LOAD]], ptr addrspace(1) [[KERNARG]], align 8
+; V6-NEXT: ret void
;
call void @use_everything_else()
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -176,6 +195,15 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v3_0(ptr
; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
; V5-NEXT: ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v3_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR2]] {
+; V6-NEXT: call void @use_everything_else()
+; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 32
+; V6-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
+; V6-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
+; V6-NEXT: ret void
;
call void @use_everything_else()
%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -203,6 +231,15 @@ define amdgpu_kernel void @test_default_queue_completion_action_offset_v5_0(ptr
; V5-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
; V5-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
; V5-NEXT: ret void
+;
+; V6-LABEL: define {{[^@]+}}@test_default_queue_completion_action_offset_v5_0
+; V6-SAME: (ptr addrspace(1) [[KERNARG:%.*]]) #[[ATTR5:[0-9]+]] {
+; V6-NEXT: call void @use_everything_else()
+; V6-NEXT: [[IMPLICITARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; V6-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[IMPLICITARG_PTR]], i64 104
+; V6-NEXT: [[LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[GEP]], align 16
+; V6-NEXT: store <2 x ptr> [[LOAD]], ptr addrspace(1) [[KERNARG]], align 16
+; V6-NEXT: ret void
;
call void @use_everything_else()%implicitarg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
@@ -234,7 +271,16 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
; V5: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
; V5: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
;.
+; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+; V6: attributes #[[ATTR1]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-waves-per-eu"="4,10" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR2]] = { "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR3]] = { "amdgpu-no-completion-action" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR4]] = { "amdgpu-no-default-queue" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR5]] = { "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+;.
; V4: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 400}
;.
; V5: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 500}
;.
+; V6: [[META0:![0-9]+]] = !{i32 1, !"amdgpu_code_object_version", i32 600}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
index f4c55e602c64cc..ebbbe8aaa3a113 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.implicitarg.ptr.ll
@@ -1,3 +1,4 @@
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -verify-machineinstrs | FileCheck -check-prefixes=GCN,HSA,COV4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs | FileCheck -check-prefixes=GCN,MESA %s
diff --git a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
index ff06f98df56371..494ace8a641e8f 100644
--- a/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
+++ b/llvm/test/CodeGen/AMDGPU/non-entry-alloca.ll
@@ -1,6 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=MUBUF,DEFAULTSIZE %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=MUBUF,DEFAULTSIZE-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=MUBUF,DEFAULTSIZE-V5 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=MUBUF,ASSUME1024 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -amdgpu-assume-dynamic-stack-object-size=1024 | FileCheck -check-prefixes=MUBUF,ASSUME1024 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-flat-scratch | FileCheck -check-prefixes=FLATSCR,DEFAULTSIZE %s
diff --git a/llvm/test/CodeGen/AMDGPU/recursion.ll b/llvm/test/CodeGen/AMDGPU/recursion.ll
index 95c1a085ee8cf4..ccf30b5a593f7a 100644
--- a/llvm/test/CodeGen/AMDGPU/recursion.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursion.ll
@@ -1,5 +1,6 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=V5 %s
; CHECK-LABEL: {{^}}recursive:
; CHECK: ScratchSize: 16
diff --git a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
index c30089a8dd32a0..503b3348757971 100644
--- a/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/resource-usage-dead-function.ll
@@ -1,5 +1,6 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN-V5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -o - | FileCheck -check-prefix=GCN-V5 %s
; Make sure there's no assertion when trying to report the resource
; usage for a function which becomes dead during codegen.
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll
index b78f6412cd677a..223738345242c8 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-any.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx900
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x12C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ANY_V4 (0x100)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll
index a3c75e09975331..073bdcb8a37fbc 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-not-supported.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
; ELF-NEXT: ]
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
index d4d8af8c1a99be..ce775c5bc124d9 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-off.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
index 9ca8b055a27979..568a2312aa479d 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-all-on.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
index fd3f5878469e66..0e011054738c45 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-1.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
index 34673dd5b89197..bb79876dee6dc7 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-off-2.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
index c283ece7e8bdff..038cd4cd8355c1 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-1.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
index 869254cae5258b..32013837f0788d 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-mul-func-xnack-any-on-2.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll
index cc04eb0e661d65..e30ff18bd4873f 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-not-supported.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
; ASM: amdhsa.target: amdgcn-amd-amdhsa--gfx700
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX700 (0x22)
; ELF-NEXT: ]
diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
index e84778b526f11d..a11b15ad526cc7 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-off.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack-"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack-'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x22C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_OFF_V4 (0x200)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
index a1ab6ed5f082ac..50f8bc81909f3a 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-one-func-xnack-on.ll
@@ -1,10 +1,14 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefixes=ASM,ASM6 %s
+
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=5 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF5 %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=6 --filetype=obj | llvm-readobj --file-headers - | FileCheck --check-prefixes=ELF,ELF6 %s
; ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx900:xnack+"
; ASM: amdhsa.target: 'amdgcn-amd-amdhsa--gfx900:xnack+'
@@ -12,10 +16,12 @@
; ASM: - 1
; ASM4: - 1
; ASM5: - 2
+; ASM6: - 3
; ELF: OS/ABI: AMDGPU_HSA (0x40)
; ELF4: ABIVersion: 2
; ELF5: ABIVersion: 3
+; ELF6: ABIVersion: 4
; ELF: Flags [ (0x32C)
; ELF-NEXT: EF_AMDGPU_FEATURE_XNACK_ON_V4 (0x300)
; ELF-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX900 (0x2C)
diff --git a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s
index 6edac771faa07e..999d1bd187664f 100644
--- a/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s
+++ b/llvm/test/MC/AMDGPU/hsa-v5-uses-dynamic-stack.s
@@ -3,6 +3,11 @@
// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=6 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=6 -mattr=+xnack -filetype=obj < %s > %t
+// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
// READOBJ: Section Headers
// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
diff --git a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
similarity index 100%
rename from llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
rename to llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s
new file mode 100644
index 00000000000000..337938e2a57baf
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.s
@@ -0,0 +1,16 @@
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=1 -o %t.o
+; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V1
+
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=4 -o %t.o
+; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V4
+
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=32 -o %t.o
+; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V32
+
+; RUN: llvm-mc %s --triple=amdgcn-amd-amdhsa -mcpu=gfx908 -filetype=obj --amdhsa-code-object-version=6 --amdgpu-force-generic-version=255 -o %t.o
+; RUN: llvm-readelf -h %t.o | FileCheck %s --check-prefix=V255
+
+; V1: generic_v1
+; V4: generic_v4
+; V32: generic_v32
+; V255: generic_v255
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test
new file mode 100644
index 00000000000000..ae7f96c92c266e
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/generic_versions.test
@@ -0,0 +1,26 @@
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V1
+# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V1
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V32
+# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V32
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V126
+# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V126
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DGENERICVER=EF_AMDGPU_GENERIC_VERSION_V255
+# RUN: llvm-readelf -h %t | FileCheck %s --check-prefixes=V255
+
+# V1: generic_v1
+# V32: generic_v32
+# V126: generic_v126
+# V255: generic_v255
+
+--- !ELF
+FileHeader:
+ Class: ELFCLASS64
+ Data: ELFDATA2LSB
+ OSABI: ELFOSABI_AMDGPU_HSA
+ ABIVersion: [[ABI_VERSION]]
+ Type: ET_REL
+ Machine: EM_AMDGPU
+ Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX900, [[GENERICVER]] ]
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index f369a63add1149..d1382e829bb3aa 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -621,7 +621,7 @@ template <typename ELFT> class GNUELFDumper : public ELFDumper<ELFT> {
template <typename T, typename TEnum>
std::string printFlags(T Value, ArrayRef<EnumEntry<TEnum>> EnumValues,
TEnum EnumMask1 = {}, TEnum EnumMask2 = {},
- TEnum EnumMask3 = {}) const {
+ TEnum EnumMask3 = {}, TEnum EnumMask4 = {}) const {
std::string Str;
for (const EnumEntry<TEnum> &Flag : EnumValues) {
if (Flag.Value == 0)
@@ -634,6 +634,8 @@ template <typename ELFT> class GNUELFDumper : public ELFDumper<ELFT> {
EnumMask = EnumMask2;
else if (Flag.Value & EnumMask3)
EnumMask = EnumMask3;
+ else if (Flag.Value & EnumMask4)
+ EnumMask = EnumMask4;
bool IsEnum = (Flag.Value & EnumMask) != 0;
if ((!IsEnum && (Value & Flag.Value) == Flag.Value) ||
(IsEnum && (Value & EnumMask) == Flag.Value)) {
@@ -1558,134 +1560,89 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_MIPS_ARCH_64R6, "mips64r6")
};
+// clang-format off
+#define AMDGPU_MACH_ENUM_ENTS \
+ ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201")
+// clang-format on
+
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
- ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"),
+ AMDGPU_MACH_ENUM_ENTS,
ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_V3, "xnack"),
ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_V3, "sramecc"),
};
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
- ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"),
- ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"),
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"),
+ AMDGPU_MACH_ENUM_ENTS,
+ ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ANY_V4, "xnack"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_OFF_V4, "xnack-"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ON_V4, "xnack+"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_ANY_V4, "sramecc"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_OFF_V4, "sramecc-"),
+ ENUM_ENT(EF_AMDGPU_FEATURE_SRAMECC_ON_V4, "sramecc+"),
+};
+
+const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion6[] = {
+ AMDGPU_MACH_ENUM_ENTS,
ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ANY_V4, "xnack"),
ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_OFF_V4, "xnack-"),
ENUM_ENT(EF_AMDGPU_FEATURE_XNACK_ON_V4, "xnack+"),
@@ -3678,6 +3635,19 @@ template <class ELFT> void GNUELFDumper<ELFT>::printFileHeaders() {
unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4));
break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
+ ElfFlags =
+ printFlags(e.e_flags, ArrayRef(ElfHeaderAMDGPUFlagsABIVersion6),
+ unsigned(ELF::EF_AMDGPU_MACH),
+ unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
+ unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4),
+ unsigned(ELF::EF_AMDGPU_GENERIC_VERSION));
+ if (auto GenericV = e.e_flags & ELF::EF_AMDGPU_GENERIC_VERSION) {
+ ElfFlags +=
+ ", generic_v" +
+ to_string(GenericV >> ELF::EF_AMDGPU_GENERIC_VERSION_OFFSET);
+ }
+ break;
}
}
Str = "0x" + utohexstr(e.e_flags);
@@ -6949,6 +6919,14 @@ template <class ELFT> void LLVMELFDumper<ELFT>::printFileHeaders() {
unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4));
break;
+ case ELF::ELFABIVERSION_AMDGPU_HSA_V6:
+ W.printFlags("Flags", E.e_flags,
+ ArrayRef(ElfHeaderAMDGPUFlagsABIVersion6),
+ unsigned(ELF::EF_AMDGPU_MACH),
+ unsigned(ELF::EF_AMDGPU_FEATURE_XNACK_V4),
+ unsigned(ELF::EF_AMDGPU_FEATURE_SRAMECC_V4),
+ unsigned(ELF::EF_AMDGPU_GENERIC_VERSION));
+ break;
}
} else if (E.e_machine == EM_RISCV)
W.printFlags("Flags", E.e_flags, ArrayRef(ElfHeaderRISCVFlags));
>From 1f556ac08a1c517acba4c75c3bc14431ca15330f Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 4 Jan 2024 14:48:05 +0100
Subject: [PATCH 2/3] [AMDGPU] Introduce GFX9/10.1/10.3/11 Generic Targets
These generic targets include multiple GPUs and will, in the future, provide a way to build once and run on multiple GPU, at the cost of less optimization opportunities.
Note that this is just doing the compiler side of things, device libs an runtimes/loader/etc. don't know about these targets yet, so none of them actually work in practice right now. This is just the initial commit to make LLVM aware of them.
No docs in this patch either as I plan to do it all in a follow-up patch.
---
clang/lib/Basic/Targets/AMDGPU.cpp | 20 +-
clang/test/Driver/amdgpu-macros.cl | 5 +
clang/test/Driver/amdgpu-mcpu.cl | 10 +
llvm/docs/AMDGPUUsage.rst | 323 +++++++++++++-----
llvm/include/llvm/BinaryFormat/ELF.h | 6 +-
llvm/include/llvm/TargetParser/TargetParser.h | 10 +
llvm/lib/Object/ELFObjectFile.cpp | 10 +
llvm/lib/ObjectYAML/ELFYAML.cpp | 4 +
llvm/lib/Target/AMDGPU/AMDGPU.td | 87 ++---
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 7 +
.../AMDGPURemoveIncompatibleFunctions.cpp | 6 +-
llvm/lib/Target/AMDGPU/GCNProcessors.td | 22 ++
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 +
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 26 ++
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 11 +
llvm/lib/TargetParser/TargetParser.cpp | 46 +++
.../GlobalISel/llvm.amdgcn.workitem.id.ll | 1 +
.../CodeGen/AMDGPU/directive-amdgcn-target.ll | 14 +
.../CodeGen/AMDGPU/elf-header-flags-mach.ll | 10 +
llvm/test/CodeGen/AMDGPU/gds-allocation.ll | 1 +
llvm/test/CodeGen/AMDGPU/gds-atomic.ll | 1 +
.../AMDGPU/generic-targets-require-v6.ll | 18 +
.../AMDGPU/hsa-generic-target-features.ll | 31 ++
.../llvm.amdgcn.image.gather4.d16.dim.ll | 3 +
.../AMDGPU/llvm.amdgcn.image.sample.dim.ll | 3 +
.../AMDGPU/unsupported-image-sample.ll | 12 +-
.../Object/AMDGPU/elf-header-flags-mach.yaml | 29 ++
.../llvm-objdump/ELF/AMDGPU/subtarget.ll | 20 ++
.../llvm-readobj/ELF/AMDGPU/elf-headers.test | 12 +
llvm/tools/llvm-readobj/ELFDumper.cpp | 128 +++----
30 files changed, 689 insertions(+), 191 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 6f3a4908623da7..ca935174c05cec 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -275,13 +275,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__R600__");
if (GPUKind != llvm::AMDGPU::GK_NONE) {
- StringRef CanonName = isAMDGCN(getTriple()) ?
- getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
+ std::string CanonName = (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
+ : getArchNameR600(GPUKind))
+ .str();
+
+ // Sanitize the name of generic targets.
+ // e.g. gfx10.1-generic -> gfx10_1_generic
+ if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
+ GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
+ std::replace(CanonName.begin(), CanonName.end(), '.', '_');
+ std::replace(CanonName.begin(), CanonName.end(), '-', '_');
+ }
+
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
// Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
if (isAMDGCN(getTriple())) {
- assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name");
- Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
+ assert(StringRef(CanonName).starts_with("gfx") &&
+ "Invalid amdgcn canonical name");
+ StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
+ Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
Twine("__"));
}
if (isAMDGCN(getTriple())) {
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index 81c22af460d12d..3b10444ef71d36 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -131,6 +131,11 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11
+
// ARCH-GCN-DAG: #define FP_FAST_FMA 1
// FAST_FMAF-DAG: #define FP_FAST_FMAF 1
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index eeb16ae98ebad7..6f18ea0615cb69 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -115,6 +115,11 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s
+
// GCNDEFAULT-NOT: -target-cpu
// GFX600: "-target-cpu" "gfx600"
// GFX601: "-target-cpu" "gfx601"
@@ -160,3 +165,8 @@
// GFX1151: "-target-cpu" "gfx1151"
// GFX1200: "-target-cpu" "gfx1200"
// GFX1201: "-target-cpu" "gfx1201"
+
+// GFX9_GENERIC: "-target-cpu" "gfx9-generic"
+// GFX10_1_GENERIC: "-target-cpu" "gfx10.1-generic"
+// GFX10_3_GENERIC: "-target-cpu" "gfx10.3-generic"
+// GFX11_GENERIC: "-target-cpu" "gfx11-generic"
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 848cd7471be23b..345f3137bedc5c 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -520,6 +520,104 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
=========== =============== ============ ===== ================= =============== =============== ======================
+Generic processors also exist.
+Generic processor code objects can be executed on any of the processors that
+are supported by the generic processor. Such code objects may not perform as well
+as those for the non-generic processors.
+
+Generic processors are only available on code object V6 and above (see :ref:`amdgpu-elf-code-object`).
+
+Generic processor code objects are versioned (see :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`).
+The version number is used by runtimes to determine if a code object can be run on a specific agent.
+The version number may be increased as-needed, e.g., when a major codegen change occurs for a generic
+target.
+
+ .. table:: AMDGPU Generic Processors
+ :name: amdgpu-generic-processor-table
+
+ ==================== ============== ================= ================== ================= =================================
+ Processor Target Supported Target Features Target Properties Target Restrictions
+ Triple Processors Supported
+ Architecture
+
+ ==================== ============== ================= ================== ================= =================================
+ ``gfx9-generic`` ``amdgcn`` - ``gfx900`` - xnack - Absolute flat - ``v_mad_mix`` instructions
+ - ``gfx902`` scratch are not available on
+ - ``gfx904`` ``gfx900``, ``gfx902``,
+ - ``gfx906`` ``gfx909``, ``gfx90c``
+ - ``gfx909`` - ``v_fma_mix`` instructions
+ - ``gfx90c`` are not available on ``gfx904``
+ - sramecc is not available on
+ ``gfx906``
+ - The following instructions
+ are not available on ``gfx906``:
+
+ - ``v_fmac_f32``
+ - ``v_xnor_b32``
+ - ``v_dot4_i32_i8``
+ - ``v_dot8_i32_i4``
+ - ``v_dot2_i32_i16``
+ - ``v_dot2_u32_u16``
+ - ``v_dot4_u32_u8``
+ - ``v_dot8_u32_u4``
+ - ``v_dot2_f32_f16``
+
+
+ ``gfx10.1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are
+ - ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011``
+ - ``gfx1012`` - cumode and ``gfx1012``
+ - ``gfx1013``
+ - ``v_dot4_i32_i8``
+ - ``v_dot8_i32_i4``
+ - ``v_dot2_i32_i16``
+ - ``v_dot2_u32_u16``
+ - ``v_dot2c_f32_f16``
+ - ``v_dot4c_i32_i8``
+ - ``v_dot4_u32_u8``
+ - ``v_dot8_u32_u4``
+ - ``v_dot2_f32_f16``
+
+ - BVH Ray Tracing instructions
+ are not available on
+ ``gfx1013``
+
+
+ ``gfx10.3-generic`` ``amdgcn`` - ``gfx1030`` - wavefrontsize64 - Absolute flat No restrictions.
+ - ``gfx1031`` - cumode scratch
+ - ``gfx1032``
+ - ``gfx1033``
+ - ``gfx1034``
+ - ``gfx1035``
+ - ``gfx1036``
+
+
+ ``gfx11-generic`` ``amdgcn`` - ``gfx1100`` - wavefrontsize64 - Architected Various codegen pessimizations
+ - ``gfx1101`` - cumode flat scratch are applied to work around some
+ - ``gfx1102`` - Packed hazards specific to some targets
+ - ``gfx1103`` work-item within this family.
+ - ``gfx1150`` IDs
+ - ``gfx1151`` Not all VGPRs can be used on:
+
+ - ``gfx1100``
+ - ``gfx1101``
+ - ``gfx1151``
+
+ SALU floating point instructions
+ and single-use VGPR hint
+ instructions are not available
+ on:
+
+ - ``gfx1150``
+ - ``gfx1151``
+
+ SGPRs are not supported for src1
+ in dpp instructions for:
+
+ - ``gfx1150``
+ - ``gfx1151``
+ ==================== ============== ================= ================== ================= =================================
+
+
.. _amdgpu-target-features:
Target Features
@@ -1439,6 +1537,7 @@ The AMDGPU backend uses the following ELF header:
- ``ELFABIVERSION_AMDGPU_HSA_V3``
- ``ELFABIVERSION_AMDGPU_HSA_V4``
- ``ELFABIVERSION_AMDGPU_HSA_V5``
+ - ``ELFABIVERSION_AMDGPU_HSA_V6``
- ``ELFABIVERSION_AMDGPU_PAL``
- ``ELFABIVERSION_AMDGPU_MESA3D``
``e_type`` - ``ET_REL``
@@ -1447,7 +1546,8 @@ The AMDGPU backend uses the following ELF header:
``e_entry`` 0
``e_flags`` See :ref:`amdgpu-elf-header-e_flags-v2-table`,
:ref:`amdgpu-elf-header-e_flags-table-v3`,
- and :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`
+ :ref:`amdgpu-elf-header-e_flags-table-v4-v5`,
+ and :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`
========================== ===============================
..
@@ -1467,6 +1567,7 @@ The AMDGPU backend uses the following ELF header:
``ELFABIVERSION_AMDGPU_HSA_V3`` 1
``ELFABIVERSION_AMDGPU_HSA_V4`` 2
``ELFABIVERSION_AMDGPU_HSA_V5`` 3
+ ``ELFABIVERSION_AMDGPU_HSA_V6`` 4
``ELFABIVERSION_AMDGPU_PAL`` 0
``ELFABIVERSION_AMDGPU_MESA3D`` 0
=============================== =====
@@ -1513,6 +1614,10 @@ The AMDGPU backend uses the following ELF header:
runtime ABI for code object V5. Specify using the Clang option
``-mcode-object-version=5``.
+ * ``ELFABIVERSION_AMDGPU_HSA_V6`` is used to specify the version of AMD HSA
+ runtime ABI for code object V6. Specify using the Clang option
+ ``-mcode-object-version=6``.
+
* ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
runtime ABI.
@@ -1539,8 +1644,9 @@ The AMDGPU backend uses the following ELF header:
``NT_AMD_HSA_ISA_VERSION`` note record for code object V2 (see
:ref:`amdgpu-note-records-v2`) and in the ``EF_AMDGPU_MACH`` bit field of the
``e_flags`` for code object V3 and above (see
- :ref:`amdgpu-elf-header-e_flags-table-v3` and
- :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`).
+ :ref:`amdgpu-elf-header-e_flags-table-v3`,
+ :ref:`amdgpu-elf-header-e_flags-table-v4-v5` and
+ :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`).
``e_entry``
The entry point is 0 as the entry points for individual kernels must be
@@ -1611,8 +1717,8 @@ The AMDGPU backend uses the following ELF header:
:ref:`amdgpu-target-features`.
================================= ===== =============================
- .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and After
- :name: amdgpu-elf-header-e_flags-table-v4-onwards
+ .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and V5
+ :name: amdgpu-elf-header-e_flags-table-v4-v5
============================================ ===== ===================================
Name Value Description
@@ -1638,80 +1744,118 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled.
============================================ ===== ===================================
+ .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V6 and After
+ :name: amdgpu-elf-header-e_flags-table-v6-onwards
+
+ ============================================ ========== =========================================
+ Name Value Description
+ ============================================ ========== =========================================
+ ``EF_AMDGPU_MACH`` 0x0ff AMDGPU processor selection
+ mask for
+ ``EF_AMDGPU_MACH_xxx`` values
+ defined in
+ :ref:`amdgpu-ef-amdgpu-mach-table`.
+ ``EF_AMDGPU_FEATURE_XNACK_V4`` 0x300 XNACK selection mask for
+ ``EF_AMDGPU_FEATURE_XNACK_*_V4``
+ values.
+ ``EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4`` 0x000 XNACK unsupported.
+ ``EF_AMDGPU_FEATURE_XNACK_ANY_V4`` 0x100 XNACK can have any value.
+ ``EF_AMDGPU_FEATURE_XNACK_OFF_V4`` 0x200 XNACK disabled.
+ ``EF_AMDGPU_FEATURE_XNACK_ON_V4`` 0x300 XNACK enabled.
+ ``EF_AMDGPU_FEATURE_SRAMECC_V4`` 0xc00 SRAMECC selection mask for
+ ``EF_AMDGPU_FEATURE_SRAMECC_*_V4``
+ values.
+ ``EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4`` 0x000 SRAMECC unsupported.
+ ``EF_AMDGPU_FEATURE_SRAMECC_ANY_V4`` 0x400 SRAMECC can have any value.
+ ``EF_AMDGPU_FEATURE_SRAMECC_OFF_V4`` 0x800 SRAMECC disabled,
+ ``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled.
+ ``EF_AMDGPU_GENERIC_VERSION_V<X>`` 0x01000000 Value between 1 and 255 for generic code
+ to object versioning, stored in the MSB.
+ 0xff000000 See :ref:`amdgpu-generic-processor-table`
+ ============================================ ========== =========================================
+
.. table:: AMDGPU ``EF_AMDGPU_MACH`` Values
:name: amdgpu-ef-amdgpu-mach-table
- ==================================== ========== =============================
- Name Value Description (see
- :ref:`amdgpu-processor-table`)
- ==================================== ========== =============================
- ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified*
- ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600``
- ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630``
- ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880``
- ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670``
- ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710``
- ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730``
- ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770``
- ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar``
- ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress``
- ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper``
- ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood``
- ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo``
- ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts``
- ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos``
- ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman``
- ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks``
- *reserved* 0x011 - Reserved for ``r600``
- 0x01f architecture processors.
- ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600``
- ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601``
- ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700``
- ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701``
- ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702``
- ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703``
- ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704``
- *reserved* 0x027 Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801``
- ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802``
- ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803``
- ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810``
- ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900``
- ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902``
- ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904``
- ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906``
- ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908``
- ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909``
- ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033``
- ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602``
- ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705``
- ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
- ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
- ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
- *reserved* 0x049 Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
- ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
- ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
- *reserved* 0x04d Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
- ==================================== ========== =============================
+ ========================================== ========== =============================
+ Name Value Description (see
+ :ref:`amdgpu-processor-table`)
+ ========================================== ========== =============================
+ ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified*
+ ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600``
+ ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630``
+ ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880``
+ ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670``
+ ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710``
+ ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730``
+ ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770``
+ ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar``
+ ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress``
+ ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper``
+ ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood``
+ ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo``
+ ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts``
+ ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos``
+ ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman``
+ ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks``
+ *reserved* 0x011 - Reserved for ``r600``
+ 0x01f architecture processors.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704``
+ *reserved* 0x027 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
+ *reserved* 0x049 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
+ *reserved* 0x04d Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ *reserved* 0x04f Reserved.
+ *reserved* 0x050 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10.1-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10.3-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic``
+ ========================================== ========== =============================
Sections
--------
@@ -4140,6 +4284,33 @@ Code object V5 metadata is the same as
====================== ============== ========= ================================
+.. _amdgpu-amdhsa-code-object-metadata-v6:
+
+Code Object V6 Metadata
++++++++++++++++++++++++
+
+.. warning::
+ Code object V6 is not the default code object version emitted by this version
+ of LLVM.
+
+
+Code object V6 metadata is the same as
+:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table
+:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v6`.
+
+ .. table:: AMDHSA Code Object V6 Metadata Map Changes
+ :name: amdgpu-amdhsa-code-object-metadata-map-table-v6
+
+ ================= ============== ========= =======================================
+ String Key Value Type Required? Description
+ ================= ============== ========= =======================================
+ "amdhsa.version" sequence of Required - The first integer is the major
+ 2 integers version. Currently 1.
+ - The second integer is the minor
+ version. Currently 3.
+ ================= ============== ========= =======================================
+
+
..
Kernel Dispatch
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index efd41f9812baa0..3eddaee4f7d1ad 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -790,11 +790,15 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
+ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
+ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
// clang-format on
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 6464285980f00a..7da11701fefb8b 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -111,6 +111,14 @@ enum GPUKind : uint32_t {
GK_AMDGCN_FIRST = GK_GFX600,
GK_AMDGCN_LAST = GK_GFX1201,
+
+ GK_GFX9_GENERIC = 192,
+ GK_GFX10_1_GENERIC = 193,
+ GK_GFX10_3_GENERIC = 194,
+ GK_GFX11_GENERIC = 195,
+
+ GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC,
+ GK_AMDGCN_GENERIC_LAST = GK_GFX11_GENERIC,
};
/// Instruction set architecture version.
@@ -147,6 +155,8 @@ enum ArchFeatureKind : uint32_t {
FEATURE_WGP = 1 << 9,
};
+StringRef getArchFamilyNameAMDGCN(GPUKind AK);
+
StringRef getArchNameAMDGCN(GPUKind AK);
StringRef getArchNameR600(GPUKind AK);
StringRef getCanonicalArchName(const Triple &T, StringRef Arch);
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index ae21b81c10c82a..c9009be5fdbf6e 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -514,6 +514,16 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1200";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201:
return "gfx1201";
+
+ // Generic AMDGCN targets
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
+ return "gfx9-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
+ return "gfx10.1-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
+ return "gfx10.3-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
+ return "gfx11-generic";
default:
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index ff7151e016a9ba..9ebc3eb95c5548 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -612,6 +612,10 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 0f79d775b44545..720df0f2c6c2de 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1002,6 +1002,12 @@ def FeatureGWS : SubtargetFeature<"gws",
"Has Global Wave Sync"
>;
+def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6",
+ "RequiresCOV6",
+ "true",
+ "Target Requires Code Object V6"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -1212,6 +1218,17 @@ def FeatureISAVersion9_0_Common : FeatureSet<
FeatureImageInsts,
FeatureMadMacF32Insts]>;
+def FeatureISAVersion9_0_Consumer_Common : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureImageGather4D16Bug,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureGDS])>;
+
+def FeatureISAVersion9_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion9_0_MI_Common : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureFmaMixInsts,
@@ -1230,43 +1247,27 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_0 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_2 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_4 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureFmaMixInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureFmaMixInsts])>;
def FeatureISAVersion9_0_6 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- HalfRate64Ops,
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [HalfRate64Ops,
FeatureFmaMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot7Insts,
FeatureDot10Insts,
- FeatureSupportsSRAMECC,
- FeatureImageGather4D16Bug])>;
+ FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_8 : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1279,13 +1280,9 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_9 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts,
+ FeatureImageInsts])>;
def FeatureISAVersion9_0_A : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1301,12 +1298,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureKernargPreload])>;
def FeatureISAVersion9_0_C : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_4_Common : FeatureSet<
[FeatureGFX9,
@@ -1387,6 +1380,10 @@ def FeatureISAVersion10_1_Common : FeatureSet<
FeatureFlatSegmentOffsetBug,
FeatureNegativeUnalignedScratchOffsetBug])>;
+def FeatureISAVersion10_1_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion10_1_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion10_1_0 : FeatureSet<
!listconcat(FeatureISAVersion10_1_Common.Features,
[])>;
@@ -1426,6 +1423,10 @@ def FeatureISAVersion10_3_0 : FeatureSet<
FeatureDot10Insts,
FeatureShaderCyclesRegister])>;
+def FeatureISAVersion10_3_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion10_3_0.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_Common : FeatureSet<
[FeatureGFX11,
FeatureLDSBankCount32,
@@ -1448,6 +1449,16 @@ def FeatureISAVersion11_Common : FeatureSet<
FeatureVcmpxPermlaneHazard,
FeatureMADIntraFwdBug]>;
+// There are few workarounds that need to be
+// added to all targets. This pessimizes codegen
+// a bit on the generic GFX11 target.
+def FeatureISAVersion11_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureMSAALoadDstSelBug,
+ FeatureVALUTransUseHazard,
+ FeatureUserSGPRInit16Bug,
+ FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_0_Common : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureMSAALoadDstSelBug,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 74f1a2e8561b89..5fb1468299c3a4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -153,6 +153,13 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
const Function &F = MF->getFunction();
+ // TODO: We're checking this late, would be nice to check it earlier.
+ if (STM.requiresCodeObjectV6() &&
+ AMDGPU::getAmdhsaCodeObjectVersion() < AMDGPU::AMDHSA_COV6)
+ report_fatal_error(
+ STM.getCPU() + " is only available on code object version 6 or better",
+ /*gen_crash_diag*/ false);
+
// TODO: Which one is called first, emitStartOfAsmFile or
// emitFunctionBodyStart?
if (!getTargetStreamer()->getTargetID())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
index 6f1236fd3b7dae..9d44b65d1698ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
@@ -139,10 +139,10 @@ bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
const GCNSubtarget *ST =
static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
- // Check the GPU isn't generic. Generic is used for testing only
- // and we don't want this pass to interfere with it.
+ // Check the GPU isn't generic or generic-hsa. Generic is used for testing
+ // only and we don't want this pass to interfere with it.
StringRef GPUName = ST->getCPU();
- if (GPUName.empty() || GPUName.contains("generic"))
+ if (GPUName.empty() || GPUName.starts_with("generic"))
return false;
// Try to fetch the GPU's info. If we can't, it's likely an unknown processor
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 96af1a6aab3da7..4671e03d43b3ab 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -204,6 +204,11 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
+// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
+def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
+ FeatureISAVersion9_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
@@ -252,6 +257,16 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel,
FeatureISAVersion10_3_0.Features
>;
+// [gfx1010, gfx1011, gfx1012, gfx1013]
+def : ProcessorModel<"gfx10.1-generic", GFX10SpeedModel,
+ FeatureISAVersion10_1_Generic.Features
+>;
+
+// [gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036]
+def : ProcessorModel<"gfx10.3-generic", GFX10SpeedModel,
+ FeatureISAVersion10_3_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX11.
//===----------------------------------------------------------------------===//
@@ -280,10 +295,17 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel,
FeatureISAVersion11_5_1.Features
>;
+// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151]
+def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
+ FeatureISAVersion11_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX12.
//===----------------------------------------------------------------------===//
+// TODO: gfx12-generic ?
+
def : ProcessorModel<"gfx1200", GFX12SpeedModel,
FeatureISAVersion12.Features
>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 1af83ca7bf27fd..27eee756229ff2 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -224,6 +224,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasVALUTransUseHazard = false;
bool HasForceStoreSC0SC1 = false;
+ bool RequiresCOV6 = false;
+
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1161,6 +1163,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }
+ bool requiresCodeObjectV6() const { return RequiresCOV6; }
+
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
/// Return if operations acting on VGPR tuples require even alignment.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 8b3822677345d8..fe7cc2b99a9a8f 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -135,6 +135,10 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
default: AK = GK_NONE; break;
}
@@ -213,6 +217,10 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
+ case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
+ case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
+ case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
+ case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
// clang-format on
@@ -711,6 +719,24 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
unsigned Flags = getEFlagsV4();
unsigned Version = ForceGenericVersion;
+ if (!Version) {
+ switch (parseArchAMDGCN(STI.getCPU())) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ Version = GenericVersion::GFX9;
+ break;
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ Version = GenericVersion::GFX10_1;
+ break;
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ Version = GenericVersion::GFX10_3;
+ break;
+ case AMDGPU::GK_GFX11_GENERIC:
+ Version = GenericVersion::GFX11;
+ break;
+ default:
+ break;
+ }
+ }
// Versions start at 1.
if (Version) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index c2fdb4e817099c..46c8202d0026c6 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,6 +42,17 @@ namespace AMDGPU {
struct IsaVersion;
+/// Generic target versions emitted by this version of LLVM.
+///
+/// These numbers are incremented every time a codegen breaking change occurs
+/// within a generic family.
+namespace GenericVersion {
+static constexpr unsigned GFX9 = 1;
+static constexpr unsigned GFX10_1 = 1;
+static constexpr unsigned GFX10_3 = 1;
+static constexpr unsigned GFX11 = 1;
+} // namespace GenericVersion
+
enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
/// \returns True if \p STI is AMDHSA.
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 732db23a8b9a80..814f2d7fa01edc 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -126,6 +126,11 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+
+ {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx10.1-generic"}, {"gfx10.1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx10.3-generic"}, {"gfx10.3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
// clang-format on
};
@@ -144,6 +149,22 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
} // namespace
+StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
+ switch (AK) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ return "gfx9";
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ return "gfx10";
+ case AMDGPU::GK_GFX11_GENERIC:
+ return "gfx11";
+ default: {
+ StringRef ArchName = getArchNameAMDGCN(AK);
+ return ArchName.empty() ? "" : ArchName.drop_back(2);
+ }
+ }
+}
+
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
return Entry->CanonicalName;
@@ -253,6 +274,24 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1151: return {11, 5, 1};
case GK_GFX1200: return {12, 0, 0};
case GK_GFX1201: return {12, 0, 1};
+
+ // Generic targets return the lowest common denominator
+ // within their family. That is, the ISA that is the most
+ // restricted in terms of features.
+ //
+ // gfx9-generic is tricky because there is no lowest
+ // common denominator, so we return gfx900 which has mad-mix
+ // but this family doesn't have it.
+ //
+ // This API should never be used to check for a particular
+ // feature anyway.
+ //
+ // TODO: Split up this API depending on its caller so
+ // generic target handling is more obvious and less risky.
+ case GK_GFX9_GENERIC: return {9, 0, 0};
+ case GK_GFX10_1_GENERIC: return {10, 1, 0};
+ case GK_GFX10_3_GENERIC: return {10, 3, 0};
+ case GK_GFX11_GENERIC: return {11, 0, 3};
default: return {0, 0, 0};
}
// clang-format on
@@ -299,6 +338,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1102:
case GK_GFX1101:
case GK_GFX1100:
+ case GK_GFX11_GENERIC:
Features["ci-insts"] = true;
Features["dot5-insts"] = true;
Features["dot7-insts"] = true;
@@ -324,6 +364,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1032:
case GK_GFX1031:
case GK_GFX1030:
+ case GK_GFX10_3_GENERIC:
Features["ci-insts"] = true;
Features["dot1-insts"] = true;
Features["dot2-insts"] = true;
@@ -354,6 +395,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
[[fallthrough]];
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX10_1_GENERIC:
Features["dl-insts"] = true;
Features["ci-insts"] = true;
Features["16-bit-insts"] = true;
@@ -421,6 +463,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX904:
case GK_GFX902:
case GK_GFX900:
+ case GK_GFX9_GENERIC:
Features["gfx9-insts"] = true;
[[fallthrough]];
case GK_GFX810:
@@ -507,6 +550,9 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) {
case GK_GFX1011:
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX11_GENERIC:
+ case GK_GFX10_3_GENERIC:
+ case GK_GFX10_1_GENERIC:
IsWave32Capable = true;
break;
default:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index 155f4c92f97d37..4e921f25620544 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -6,6 +6,7 @@
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index 357fcf8ef15617..038219fc374047 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -108,6 +108,13 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX1200 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck --check-prefixes=GFX1201 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_NOXNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_XNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck --check-prefixes=GFX11_GENERIC %s
+
; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602"
@@ -196,6 +203,13 @@
; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200"
; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201"
+; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-"
+; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+"
+; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack-"
+; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack+"
+; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.3-generic"
+; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx11-generic"
+
define amdgpu_kernel void @directive_amdgcn_target() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index 380439d8cd9c65..9ba8176947174c 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -77,6 +77,11 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1200 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1200 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s
+
; FIXME: With the default attributes the eflags are not accurate for
; xnack and sramecc. Subsequent Target-ID patches will address this.
@@ -149,6 +154,11 @@
; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A)
; GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48)
; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
+
+; GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
+; GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
+; GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53)
+; GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54)
; ALL: ]
define amdgpu_kernel void @elf_header() {
diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
index dc6fea4ba1a379..1a9334706cb927 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx9-generic --amdhsa-code-object-version=6 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
index 3e4e6938d72eb1..8d44330b1b9733 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
@@ -2,6 +2,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; FUNC-LABEL: {{^}}atomic_add_ret_gds:
; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
new file mode 100644
index 00000000000000..e3f4b14bac0c16
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
@@ -0,0 +1,18 @@
+; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s
+
+; GFX9-V5: gfx9-generic is only available on code object version 6 or better
+; GFX101-V5: gfx10.1-generic is only available on code object version 6 or better
+; GFX103-V5: gfx10.3-generic is only available on code object version 6 or better
+; GFX11-V5: gfx11-generic is only available on code object version 6 or better
+
+define void @foo() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
new file mode 100644
index 00000000000000..4fee563d1cc93b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
@@ -0,0 +1,31 @@
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic < %s | FileCheck -check-prefix=CU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck -check-prefix=CU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck -check-prefix=CU %s
+
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+
+; Checks 10.1, 10.3 and 11 generic targets allow cumode/wave64.
+
+; NOCU: .amdhsa_workgroup_processor_mode 0
+; NOCU: .workgroup_processor_mode: 0
+; CU: .amdhsa_workgroup_processor_mode 1
+; CU: .workgroup_processor_mode: 1
+
+; W64: .amdhsa_wavefront_size32 0
+; W32: .amdhsa_wavefront_size32 1
+
+define amdgpu_kernel void @wavefrontsize() {
+entry:
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 600}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
index 91284d3838675c..cf324d62e1de1e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
@@ -1,8 +1,11 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s
; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
index da7e57c7e142c3..bdde385d545f1f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
index 0878fc689cd4bd..b08586efe2f217 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
@@ -1,15 +1,13 @@
-; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s
-; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s
; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX940 %s
; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s
; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s
-; GFX906-LABEL: image_sample_test:
-; GFX906: image_sample_lz
-
-; GFX908-LABEL: image_sample_test:
-; GFX908: image_sample_lz
+; GFX9-LABEL: image_sample_test:
+; GFX9: image_sample_lz
; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 7fb33ca662b190..4c2b4479a4aa35 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -238,6 +238,23 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1201 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1201 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX9_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_1_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_1_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_1_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_3_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_3_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_3_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX11_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX11_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX11_GENERIC %s
+
+
# ELF-R600-ALL: Format: elf32-amdgpu
# ELF-R600-ALL: Arch: r600
# ELF-R600-ALL: AddressSize: 32bit
@@ -435,6 +452,18 @@
# ELF-AMDGCN-GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
# YAML-AMDGCN-GFX1201: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1201 ]
+# ELF-AMDGCN-GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
+# YAML-AMDGCN-GFX9_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC ]
+
+# ELF-AMDGCN-GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
+# YAML-AMDGCN-GFX10_1_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC ]
+
+# ELF-AMDGCN-GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53)
+# YAML-AMDGCN-GFX10_3_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC ]
+
+# ELF-AMDGCN-GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54)
+# YAML-AMDGCN-GFX11_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC ]
+
# ELF-AMDGCN-ALL: ]
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index e296d7fb1fc8f2..ca136a6a0d5ba2 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -18,6 +18,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX11--------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx11-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1151 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -49,6 +54,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: diff %t-specify.txt %t-detect.txt
; ----------------------------------GFX10--------------------------------------
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.3-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.3-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1036 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -84,6 +94,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llvm-objdump -D %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.1-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.1-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1013 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -107,6 +122,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
index e2266d81d1a597..7fbf4aae6c2460 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
@@ -253,6 +253,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC -DFLAG_VALUE=0x52
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
@@ -322,6 +325,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC -DFLAG_VALUE=0x53
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
@@ -355,6 +361,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC -DFLAG_VALUE=0x51
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
@@ -391,6 +400,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103 -DFLAG_VALUE=0x44
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC -DFLAG_VALUE=0x54
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150 -DFLAG_VALUE=0x43
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index d1382e829bb3aa..4a4dcf07ec8a36 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1561,68 +1561,72 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
};
// clang-format off
-#define AMDGPU_MACH_ENUM_ENTS \
- ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201")
+#define AMDGPU_MACH_ENUM_ENTS \
+ ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10.1-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10.3-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic")
// clang-format on
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
>From 81ee87d83d9fa95bb391397da9f77f8e5017bcaf Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 18 Jan 2024 13:28:34 +0100
Subject: [PATCH 3/3] fix ref
---
llvm/docs/AMDGPUUsage.rst | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 345f3137bedc5c..a30a717d18df65 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -631,7 +631,7 @@ generating the code. A mismatch of features may result in incorrect
execution, or a reduction in performance.
The target features supported by each processor is listed in
-:ref:`amdgpu-processor-table`.
+:ref:`amdgpu-processors`.
Target features are controlled by exactly one of the following Clang
options:
More information about the cfe-commits
mailing list