[clang] 544d912 - [AMDGPU] Remove Code Object V3 (#67118)
via cfe-commits
cfe-commits at lists.llvm.org
Sun Oct 15 23:21:54 PDT 2023
Author: Pierre van Houtryve
Date: 2023-10-16T08:21:48+02:00
New Revision: 544d91280c26fd5f7acd70eac4d667863562f4cc
URL: https://github.com/llvm/llvm-project/commit/544d91280c26fd5f7acd70eac4d667863562f4cc
DIFF: https://github.com/llvm/llvm-project/commit/544d91280c26fd5f7acd70eac4d667863562f4cc.diff
LOG: [AMDGPU] Remove Code Object V3 (#67118)
V3 has been deprecated for a while as well, so it can safely be removed
like V2 was removed.
- [Clang] Set minimum code object version to 4
- [lld] Fix tests using code object v3
- Remove code object V3 from the AMDGPU backend, and delete or port v3
tests to v4.
- Update docs to make it clear V3 can no longer be emitted.
Added:
llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
llvm/test/MC/AMDGPU/hsa-diag-v4.s
Modified:
clang/include/clang/Basic/TargetOptions.h
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/CommonArgs.cpp
clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
clang/test/Driver/hip-code-object-version.hip
clang/test/Driver/hip-device-libs.hip
lld/test/ELF/amdgpu-abi-version.s
llvm/docs/AMDGPUUsage.rst
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll
llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
llvm/test/CodeGen/AMDGPU/kernarg-size.ll
llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
llvm/test/CodeGen/AMDGPU/trap-abis.ll
llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
llvm/test/MC/AMDGPU/user-sgpr-count.s
Removed:
llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
llvm/test/MC/AMDGPU/hsa-diag-v3.s
llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
llvm/test/MC/AMDGPU/hsa-v3.s
################################################################################
diff --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h
index 8bb03249b7f8308..ba3acd029587160 100644
--- a/clang/include/clang/Basic/TargetOptions.h
+++ b/clang/include/clang/Basic/TargetOptions.h
@@ -83,7 +83,7 @@ class TargetOptions {
enum CodeObjectVersionKind {
COV_None,
COV_2 = 200, // Unsupported.
- COV_3 = 300,
+ COV_3 = 300, // Unsupported.
COV_4 = 400,
COV_5 = 500,
};
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 640044622fc09ee..a89d6b6579f1176 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4682,9 +4682,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
Visibility<[ClangOption, CC1Option]>,
- Values<"none,3,4,5">,
+ Values<"none,4,5">,
NormalizedValuesScope<"TargetOptions">,
- NormalizedValues<["COV_None", "COV_3", "COV_4", "COV_5"]>,
+ NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
defm cumode : SimpleMFlag<"cumode",
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 25fd940584624ee..f104ec5a881cb96 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2338,7 +2338,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
const llvm::opt::ArgList &Args) {
- const unsigned MinCodeObjVer = 3;
+ const unsigned MinCodeObjVer = 4;
const unsigned MaxCodeObjVer = 5;
if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {
diff --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index 0ddd63faf46f28f..ff5deaf9ab850d2 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -3,9 +3,6 @@
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -o - %s | FileCheck %s -check-prefix=V4
-// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN: -mcode-object-version=3 -o - %s | FileCheck -check-prefix=V3 %s
-
// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s
@@ -18,7 +15,6 @@
// RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
// RUN: -mcode-object-version=4.1 -o - %s 2>&1| FileCheck %s -check-prefix=INV
-// V3: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 300}
// V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400}
// V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500}
// NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version",
diff --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip
index 33559b6576e7d30..af5f9a3da21dfd3 100644
--- a/clang/test/Driver/hip-code-object-version.hip
+++ b/clang/test/Driver/hip-code-object-version.hip
@@ -1,20 +1,5 @@
// REQUIRES: amdgpu-registered-target
-// Check bundle ID for code object v3.
-
-// RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN: -mcode-object-version=3 \
-// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
-// RUN: %s 2>&1 | FileCheck -check-prefix=V3 %s
-
-// RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN: -mcode-object-version=4 -mcode-object-version=3 \
-// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
-// RUN: %s 2>&1 | FileCheck -check-prefix=V3 %s
-
-// V3: "-mcode-object-version=3"
-// V3: "-mllvm" "--amdhsa-code-object-version=3"
-// V3: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx906"
// Check bundle ID for code object version 4.
@@ -62,6 +47,13 @@
// INVALID_2: error: invalid integral value '2' in '-mcode-object-version=2'
// INVALID_2-NOT: error: invalid integral value
+// RUN: not %clang -### --target=x86_64-linux-gnu \
+// RUN: -mcode-object-version=3 \
+// RUN: --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
+// RUN: %s 2>&1 | FileCheck -check-prefix=INVALID_3 %s
+// INVALID_3: error: invalid integral value '3' in '-mcode-object-version=3'
+// INVALID_3-NOT: error: invalid integral value
+
// Check LLVM code object version option --amdhsa-code-object-version
// is passed to -cc1 and -cc1as, and -mcode-object-version is passed
// to -cc1 but not -cc1as.
diff --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index 71d9554da696b42..6ac5778721ba5b7 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -168,12 +168,6 @@
// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
// RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
-// Test -mcode-object-version=3
-// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
-// RUN: -mcode-object-version=3 \
-// RUN: --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4
-
// Test -mcode-object-version=4
// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
// RUN: -mcode-object-version=4 \
diff --git a/lld/test/ELF/amdgpu-abi-version.s b/lld/test/ELF/amdgpu-abi-version.s
index 455a52aec921092..72b67fdaeb1a1b6 100644
--- a/lld/test/ELF/amdgpu-abi-version.s
+++ b/lld/test/ELF/amdgpu-abi-version.s
@@ -1,11 +1,3 @@
-# REQUIRES: amdgpu
-# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj %s -o %t.o
-# RUN: ld.lld -shared %t.o -o %t.so
-# RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=COV3 %s
-
-# COV3: OS/ABI: AMDGPU_HSA (0x40)
-# COV3: ABIVersion: 1
-
# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -filetype=obj %s -o %t.o
# RUN: ld.lld -shared %t.o -o %t.so
# RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=COV4 %s
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 8022816d7e616d3..ed9581ccc93dfac 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1409,12 +1409,10 @@ The AMDGPU backend uses the following ELF header:
object conforms:
* ``ELFABIVERSION_AMDGPU_HSA_V2`` is used to specify the version of AMD HSA
- runtime ABI for code object V2. Specify using the Clang option
- ``-mcode-object-version=2``.
+ runtime ABI for code object V2. Can no longer be emitted by this version of LLVM.
* ``ELFABIVERSION_AMDGPU_HSA_V3`` is used to specify the version of AMD HSA
- runtime ABI for code object V3. Specify using the Clang option
- ``-mcode-object-version=3``.
+ runtime ABI for code object V3. Can no longer be emitted by this version of LLVM.
* ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
runtime ABI for code object V4. Specify using the Clang option
@@ -3402,8 +3400,7 @@ Code Object V3 Metadata
+++++++++++++++++++++++
.. warning::
- Code object V3 is not the default code object version emitted by this version
- of LLVM.
+ Code object V3 generation is no longer supported by this version of LLVM.
Code object V3 and above metadata is specified by the ``NT_AMDGPU_METADATA`` note
record (see :ref:`amdgpu-note-records-v3-onwards`).
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index aadc4a68ea13278..8d0ef67a615dfc6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -341,9 +341,6 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
switch (CodeObjectVersion) {
- case AMDGPU::AMDHSA_COV3:
- HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3());
- break;
case AMDGPU::AMDHSA_COV4:
HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4());
break;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 5060cd3aec581ce..b51a876750b58b0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -49,14 +49,14 @@ namespace AMDGPU {
namespace HSAMD {
//===----------------------------------------------------------------------===//
-// HSAMetadataStreamerV3
+// HSAMetadataStreamerV4
//===----------------------------------------------------------------------===//
-void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const {
+void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
}
-void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
+void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
errs() << "AMDGPU HSA Metadata Parser Test: ";
msgpack::Document FromHSAMetadataString;
@@ -78,7 +78,7 @@ void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
}
std::optional<StringRef>
-MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const {
+MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
return StringSwitch<std::optional<StringRef>>(AccQual)
.Case("read_only", StringRef("read_only"))
.Case("write_only", StringRef("write_only"))
@@ -86,7 +86,7 @@ MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const {
.Default(std::nullopt);
}
-std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier(
+std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
unsigned AddressSpace) const {
switch (AddressSpace) {
case AMDGPUAS::PRIVATE_ADDRESS:
@@ -107,7 +107,7 @@ std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier(
}
StringRef
-MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual,
+MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
StringRef BaseTypeName) const {
if (TypeQual.contains("pipe"))
return "pipe";
@@ -134,7 +134,7 @@ MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual,
: "by_value");
}
-std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty,
+std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
bool Signed) const {
switch (Ty->getTypeID()) {
case Type::IntegerTyID: {
@@ -173,7 +173,7 @@ std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty,
}
msgpack::ArrayDocNode
-MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const {
+MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
auto Dims = HSAMetadataDoc->getArrayNode();
if (Node->getNumOperands() != 3)
return Dims;
@@ -184,14 +184,20 @@ MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const {
return Dims;
}
-void MetadataStreamerMsgPackV3::emitVersion() {
+void MetadataStreamerMsgPackV4::emitVersion() {
auto Version = HSAMetadataDoc->getArrayNode();
- Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
- Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
+ Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
+ Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
getRootMetadata("amdhsa.version") = Version;
}
-void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) {
+void MetadataStreamerMsgPackV4::emitTargetID(
+ const IsaInfo::AMDGPUTargetID &TargetID) {
+ getRootMetadata("amdhsa.target") =
+ HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
+}
+
+void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
if (!Node)
return;
@@ -204,7 +210,7 @@ void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) {
getRootMetadata("amdhsa.printf") = Printf;
}
-void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func,
+void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
msgpack::MapDocNode Kern) {
// TODO: What about other languages?
auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
@@ -223,7 +229,7 @@ void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func,
Kern[".language_version"] = LanguageVersion;
}
-void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func,
+void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,
msgpack::MapDocNode Kern) {
if (auto Node = Func.getMetadata("reqd_work_group_size"))
@@ -248,7 +254,7 @@ void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func,
Kern[".kind"] = Kern.getDocument()->getNode("fini");
}
-void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF,
+void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
msgpack::MapDocNode Kern) {
auto &Func = MF.getFunction();
unsigned Offset = 0;
@@ -261,7 +267,7 @@ void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF,
Kern[".args"] = Args;
}
-void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg,
+void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
unsigned &Offset,
msgpack::ArrayDocNode Args) {
auto Func = Arg.getParent();
@@ -326,7 +332,7 @@ void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg,
AccQual, TypeQual);
}
-void MetadataStreamerMsgPackV3::emitKernelArg(
+void MetadataStreamerMsgPackV4::emitKernelArg(
const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
StringRef Name, StringRef TypeName, StringRef BaseTypeName,
@@ -375,7 +381,7 @@ void MetadataStreamerMsgPackV3::emitKernelArg(
Args.push_back(Arg);
}
-void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
+void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
auto &Func = MF.getFunction();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -448,9 +454,10 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
}
}
-msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
- const MachineFunction &MF, const SIProgramInfo &ProgramInfo,
- unsigned CodeObjectVersion) const {
+msgpack::MapDocNode
+MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
+ const SIProgramInfo &ProgramInfo,
+ unsigned CodeObjectVersion) const {
const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
const Function &F = MF.getFunction();
@@ -495,18 +502,19 @@ msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
return Kern;
}
-bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
}
-void MetadataStreamerMsgPackV3::begin(const Module &Mod,
+void MetadataStreamerMsgPackV4::begin(const Module &Mod,
const IsaInfo::AMDGPUTargetID &TargetID) {
emitVersion();
+ emitTargetID(TargetID);
emitPrintf(Mod);
getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
}
-void MetadataStreamerMsgPackV3::end() {
+void MetadataStreamerMsgPackV4::end() {
std::string HSAMetadataString;
raw_string_ostream StrOS(HSAMetadataString);
HSAMetadataDoc->toYAML(StrOS);
@@ -517,7 +525,7 @@ void MetadataStreamerMsgPackV3::end() {
verify(StrOS.str());
}
-void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF,
+void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
const SIProgramInfo &ProgramInfo) {
auto &Func = MF.getFunction();
if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
@@ -542,31 +550,6 @@ void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF,
Kernels.push_back(Kern);
}
-//===----------------------------------------------------------------------===//
-// HSAMetadataStreamerV4
-//===----------------------------------------------------------------------===//
-
-void MetadataStreamerMsgPackV4::emitVersion() {
- auto Version = HSAMetadataDoc->getArrayNode();
- Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
- Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
- getRootMetadata("amdhsa.version") = Version;
-}
-
-void MetadataStreamerMsgPackV4::emitTargetID(
- const IsaInfo::AMDGPUTargetID &TargetID) {
- getRootMetadata("amdhsa.target") =
- HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
-}
-
-void MetadataStreamerMsgPackV4::begin(const Module &Mod,
- const IsaInfo::AMDGPUTargetID &TargetID) {
- emitVersion();
- emitTargetID(TargetID);
- emitPrintf(Mod);
- getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
-}
-
//===----------------------------------------------------------------------===//
// HSAMetadataStreamerV5
//===----------------------------------------------------------------------===//
@@ -680,7 +663,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
msgpack::MapDocNode Kern) {
- MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern);
+ MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);
if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index d2b3b8917ce0f70..18a7b5d7a9633e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -62,7 +62,7 @@ class MetadataStreamer {
msgpack::MapDocNode Kern) = 0;
};
-class MetadataStreamerMsgPackV3 : public MetadataStreamer {
+class MetadataStreamerMsgPackV4 : public MetadataStreamer {
protected:
std::unique_ptr<msgpack::Document> HSAMetadataDoc =
std::make_unique<msgpack::Document>();
@@ -89,6 +89,8 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer {
void emitVersion() override;
+ void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
+
void emitPrintf(const Module &Mod);
void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
@@ -120,8 +122,8 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer {
}
public:
- MetadataStreamerMsgPackV3() = default;
- ~MetadataStreamerMsgPackV3() = default;
+ MetadataStreamerMsgPackV4() = default;
+ ~MetadataStreamerMsgPackV4() = default;
bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
@@ -134,19 +136,6 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer {
const SIProgramInfo &ProgramInfo) override;
};
-class MetadataStreamerMsgPackV4 : public MetadataStreamerMsgPackV3 {
-protected:
- void emitVersion() override;
- void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
-
-public:
- MetadataStreamerMsgPackV4() = default;
- ~MetadataStreamerMsgPackV4() = default;
-
- void begin(const Module &Mod,
- const IsaInfo::AMDGPUTargetID &TargetID) override;
-};
-
class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 {
protected:
void emitVersion() override;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 02cb77f6ecaca4e..d6717c998bec8be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -6489,11 +6489,6 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return legalizeTrapEndpgm(MI, MRI, B);
- const Module *M = B.getMF().getFunction().getParent();
- unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M);
- if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3)
- return legalizeTrapHsaQueuePtr(MI, MRI, B);
-
return ST.supportsGetDoorbellID() ?
legalizeTrapHsa(MI, MRI, B) : legalizeTrapHsaQueuePtr(MI, MRI, B);
}
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 6b8c03c1620d26b..42af09e27e471e8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -424,7 +424,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
switch (CodeObjectVersion) {
default:
break;
- case AMDGPU::AMDHSA_COV3:
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
if (getTargetID()->isXnackSupported())
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 33f65ab786584fd..cd849560feac22b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5990,11 +5990,6 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
return lowerTrapEndpgm(Op, DAG);
- const Module *M = DAG.getMachineFunction().getFunction().getParent();
- unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M);
- if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3)
- return lowerTrapHsaQueuePtr(Op, DAG);
-
return Subtarget->supportsGetDoorbellID() ? lowerTrapHsa(Op, DAG) :
lowerTrapHsaQueuePtr(Op, DAG);
}
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d123b384a27d4cc..5fff19eada75dd8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -128,8 +128,6 @@ std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
return std::nullopt;
switch (AmdhsaCodeObjectVersion) {
- case 3:
- return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
case 4:
return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
case 5:
@@ -140,12 +138,6 @@ std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
}
}
-bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
- if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
- return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
- return false;
-}
-
bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
@@ -174,7 +166,6 @@ unsigned getCodeObjectVersion(const Module &M) {
unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
- case AMDHSA_COV3:
case AMDHSA_COV4:
return 48;
case AMDHSA_COV5:
@@ -188,7 +179,6 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
// central TD file.
unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
- case AMDHSA_COV3:
case AMDHSA_COV4:
return 24;
case AMDHSA_COV5:
@@ -199,7 +189,6 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
- case AMDHSA_COV3:
case AMDHSA_COV4:
return 32;
case AMDHSA_COV5:
@@ -210,7 +199,6 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
switch (CodeObjectVersion) {
- case AMDHSA_COV3:
case AMDHSA_COV4:
return 40;
case AMDHSA_COV5:
@@ -774,15 +762,6 @@ std::string AMDGPUTargetID::toString() const {
std::string Features;
if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
switch (CodeObjectVersion) {
- case AMDGPU::AMDHSA_COV3:
- // xnack.
- if (isXnackOnOrAny())
- Features += "+xnack";
- // In code object v2 and v3, "sramecc" feature was spelled with a
- // hyphen ("sram-ecc").
- if (isSramEccOnOrAny())
- Features += "+sram-ecc";
- break;
case AMDGPU::AMDHSA_COV4:
case AMDGPU::AMDHSA_COV5:
// sramecc.
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index bb2964f592f66bf..1e0994d0862cf5d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,19 +42,12 @@ namespace AMDGPU {
struct IsaVersion;
-enum {
- AMDHSA_COV3 = 3,
- AMDHSA_COV4 = 4,
- AMDHSA_COV5 = 5
-};
+enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 };
/// \returns True if \p STI is AMDHSA.
bool isHsaAbi(const MCSubtargetInfo &STI);
/// \returns HSA OS ABI Version identification.
std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
-/// \returns True if HSA OS ABI Version identification is 3,
-/// false otherwise.
-bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
/// \returns True if HSA OS ABI Version identification is 4,
/// false otherwise.
bool isHsaAbiVersion4(const MCSubtargetInfo *STI);
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index c25ecafa1f7c074..4bdbe6604782a8b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -1,38 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
-; GFX8V3-LABEL: addrspacecast:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX8V3-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
-; GFX8V3-NEXT: v_mov_b32_e32 v2, 1
-; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT: s_mov_b32 s4, s0
-; GFX8V3-NEXT: s_mov_b32 s5, s3
-; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1
-; GFX8V3-NEXT: s_cselect_b64 s[4:5], s[4:5], 0
-; GFX8V3-NEXT: s_mov_b32 s6, s1
-; GFX8V3-NEXT: s_mov_b32 s7, s2
-; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s4
-; GFX8V3-NEXT: s_cselect_b64 s[0:1], s[6:7], 0
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s5
-; GFX8V3-NEXT: flat_store_dword v[0:1], v2
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT: v_mov_b32_e32 v2, 2
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
-; GFX8V3-NEXT: flat_store_dword v[0:1], v2
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: s_endpgm
-;
; GFX8V4-LABEL: addrspacecast:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -82,30 +55,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
-; GFX9V3-LABEL: addrspacecast:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base
-; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base
-; GFX9V3-NEXT: v_mov_b32_e32 v2, 1
-; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_mov_b32 s2, s0
-; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1
-; GFX9V3-NEXT: s_cselect_b64 s[2:3], s[2:3], 0
-; GFX9V3-NEXT: s_mov_b32 s4, s1
-; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1
-; GFX9V3-NEXT: v_mov_b32_e32 v0, s2
-; GFX9V3-NEXT: s_cselect_b64 s[0:1], s[4:5], 0
-; GFX9V3-NEXT: v_mov_b32_e32 v1, s3
-; GFX9V3-NEXT: flat_store_dword v[0:1], v2
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX9V3-NEXT: v_mov_b32_e32 v2, 2
-; GFX9V3-NEXT: v_mov_b32_e32 v1, s1
-; GFX9V3-NEXT: flat_store_dword v[0:1], v2
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: s_endpgm
-;
; GFX9V4-LABEL: addrspacecast:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -161,19 +110,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
}
define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_is_shared:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40
-; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
-; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT: flat_store_dword v[0:1], v0
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: s_endpgm
-;
; GFX8V4-LABEL: llvm_amdgcn_is_shared:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -200,18 +136,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
-; GFX9V3-LABEL: llvm_amdgcn_is_shared:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT: s_mov_b64 s[2:3], src_shared_base
-; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3
-; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0
-; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: s_endpgm
-;
; GFX9V4-LABEL: llvm_amdgcn_is_shared:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -242,19 +166,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
}
define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_is_private:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44
-; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
-; GFX8V3-NEXT: s_cselect_b32 s0, 1, 0
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT: flat_store_dword v[0:1], v0
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: s_endpgm
-;
; GFX8V4-LABEL: llvm_amdgcn_is_private:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -281,18 +192,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
-; GFX9V3-LABEL: llvm_amdgcn_is_private:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base
-; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_cmp_eq_u32 s1, s3
-; GFX9V3-NEXT: s_cselect_b32 s0, 1, 0
-; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: s_endpgm
-;
; GFX9V4-LABEL: llvm_amdgcn_is_private:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -323,11 +222,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
}
define amdgpu_kernel void @llvm_trap() {
-; GFX8V3-LABEL: llvm_trap:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5]
-; GFX8V3-NEXT: s_trap 2
-;
; GFX8V4-LABEL: llvm_trap:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5]
@@ -339,11 +233,6 @@ define amdgpu_kernel void @llvm_trap() {
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V5-NEXT: s_trap 2
;
-; GFX9V3-LABEL: llvm_trap:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5]
-; GFX9V3-NEXT: s_trap 2
-;
; GFX9V4-LABEL: llvm_trap:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_trap 2
@@ -356,10 +245,6 @@ define amdgpu_kernel void @llvm_trap() {
}
define amdgpu_kernel void @llvm_debugtrap() {
-; GFX8V3-LABEL: llvm_debugtrap:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_trap 3
-;
; GFX8V4-LABEL: llvm_debugtrap:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_trap 3
@@ -368,10 +253,6 @@ define amdgpu_kernel void @llvm_debugtrap() {
; GFX8V5: ; %bb.0:
; GFX8V5-NEXT: s_trap 3
;
-; GFX9V3-LABEL: llvm_debugtrap:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_trap 3
-;
; GFX9V4-LABEL: llvm_debugtrap:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_trap 3
@@ -384,32 +265,6 @@ define amdgpu_kernel void @llvm_debugtrap() {
}
define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_queue_ptr:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s6
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s7
-; GFX8V3-NEXT: s_add_u32 s0, s8, 8
-; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT: s_addc_u32 s1, s9, 0
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
-; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s4
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s5
-; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s10
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s11
-; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT: v_mov_b32_e32 v3, s1
-; GFX8V3-NEXT: v_mov_b32_e32 v2, s0
-; GFX8V3-NEXT: flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: s_endpgm
-;
; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: v_mov_b32_e32 v0, s6
@@ -460,23 +315,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) {
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
-; GFX9V3-LABEL: llvm_amdgcn_queue_ptr:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: v_mov_b32_e32 v2, 0
-; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc
-; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc
-; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc
-; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: v_mov_b32_e32 v0, s10
-; GFX9V3-NEXT: v_mov_b32_e32 v1, s11
-; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7
-; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5
-; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: s_endpgm
-;
; GFX9V4-LABEL: llvm_amdgcn_queue_ptr:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: v_mov_b32_e32 v2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
deleted file mode 100644
index 20d0aea61f27688..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
+++ /dev/null
@@ -1,148 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK-LABEL: {{^}}min_64_max_64:
-; CHECK: SGPRBlocks: 0
-; CHECK: VGPRBlocks: 0
-; CHECK: NumSGPRsForWavesPerEU: 1
-; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @min_64_max_64() #0 {
-entry:
- ret void
-}
-attributes #0 = {"amdgpu-flat-work-group-size"="64,64"}
-
-; CHECK-LABEL: {{^}}min_64_max_128:
-; CHECK: SGPRBlocks: 0
-; CHECK: VGPRBlocks: 0
-; CHECK: NumSGPRsForWavesPerEU: 1
-; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @min_64_max_128() #1 {
-entry:
- ret void
-}
-attributes #1 = {"amdgpu-flat-work-group-size"="64,128"}
-
-; CHECK-LABEL: {{^}}min_128_max_128:
-; CHECK: SGPRBlocks: 0
-; CHECK: VGPRBlocks: 0
-; CHECK: NumSGPRsForWavesPerEU: 1
-; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @min_128_max_128() #2 {
-entry:
- ret void
-}
-attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
-
-; CHECK-LABEL: {{^}}min_1024_max_1024
-; CHECK: SGPRBlocks: 0
-; CHECK: VGPRBlocks: 10
-; CHECK: NumSGPRsForWavesPerEU: 2{{$}}
-; CHECK: NumVGPRsForWavesPerEU: 43
- at var = addrspace(1) global float 0.0
-define amdgpu_kernel void @min_1024_max_1024() #3 {
- %val0 = load volatile float, ptr addrspace(1) @var
- %val1 = load volatile float, ptr addrspace(1) @var
- %val2 = load volatile float, ptr addrspace(1) @var
- %val3 = load volatile float, ptr addrspace(1) @var
- %val4 = load volatile float, ptr addrspace(1) @var
- %val5 = load volatile float, ptr addrspace(1) @var
- %val6 = load volatile float, ptr addrspace(1) @var
- %val7 = load volatile float, ptr addrspace(1) @var
- %val8 = load volatile float, ptr addrspace(1) @var
- %val9 = load volatile float, ptr addrspace(1) @var
- %val10 = load volatile float, ptr addrspace(1) @var
- %val11 = load volatile float, ptr addrspace(1) @var
- %val12 = load volatile float, ptr addrspace(1) @var
- %val13 = load volatile float, ptr addrspace(1) @var
- %val14 = load volatile float, ptr addrspace(1) @var
- %val15 = load volatile float, ptr addrspace(1) @var
- %val16 = load volatile float, ptr addrspace(1) @var
- %val17 = load volatile float, ptr addrspace(1) @var
- %val18 = load volatile float, ptr addrspace(1) @var
- %val19 = load volatile float, ptr addrspace(1) @var
- %val20 = load volatile float, ptr addrspace(1) @var
- %val21 = load volatile float, ptr addrspace(1) @var
- %val22 = load volatile float, ptr addrspace(1) @var
- %val23 = load volatile float, ptr addrspace(1) @var
- %val24 = load volatile float, ptr addrspace(1) @var
- %val25 = load volatile float, ptr addrspace(1) @var
- %val26 = load volatile float, ptr addrspace(1) @var
- %val27 = load volatile float, ptr addrspace(1) @var
- %val28 = load volatile float, ptr addrspace(1) @var
- %val29 = load volatile float, ptr addrspace(1) @var
- %val30 = load volatile float, ptr addrspace(1) @var
- %val31 = load volatile float, ptr addrspace(1) @var
- %val32 = load volatile float, ptr addrspace(1) @var
- %val33 = load volatile float, ptr addrspace(1) @var
- %val34 = load volatile float, ptr addrspace(1) @var
- %val35 = load volatile float, ptr addrspace(1) @var
- %val36 = load volatile float, ptr addrspace(1) @var
- %val37 = load volatile float, ptr addrspace(1) @var
- %val38 = load volatile float, ptr addrspace(1) @var
- %val39 = load volatile float, ptr addrspace(1) @var
- %val40 = load volatile float, ptr addrspace(1) @var
-
- store volatile float %val0, ptr addrspace(1) @var
- store volatile float %val1, ptr addrspace(1) @var
- store volatile float %val2, ptr addrspace(1) @var
- store volatile float %val3, ptr addrspace(1) @var
- store volatile float %val4, ptr addrspace(1) @var
- store volatile float %val5, ptr addrspace(1) @var
- store volatile float %val6, ptr addrspace(1) @var
- store volatile float %val7, ptr addrspace(1) @var
- store volatile float %val8, ptr addrspace(1) @var
- store volatile float %val9, ptr addrspace(1) @var
- store volatile float %val10, ptr addrspace(1) @var
- store volatile float %val11, ptr addrspace(1) @var
- store volatile float %val12, ptr addrspace(1) @var
- store volatile float %val13, ptr addrspace(1) @var
- store volatile float %val14, ptr addrspace(1) @var
- store volatile float %val15, ptr addrspace(1) @var
- store volatile float %val16, ptr addrspace(1) @var
- store volatile float %val17, ptr addrspace(1) @var
- store volatile float %val18, ptr addrspace(1) @var
- store volatile float %val19, ptr addrspace(1) @var
- store volatile float %val20, ptr addrspace(1) @var
- store volatile float %val21, ptr addrspace(1) @var
- store volatile float %val22, ptr addrspace(1) @var
- store volatile float %val23, ptr addrspace(1) @var
- store volatile float %val24, ptr addrspace(1) @var
- store volatile float %val25, ptr addrspace(1) @var
- store volatile float %val26, ptr addrspace(1) @var
- store volatile float %val27, ptr addrspace(1) @var
- store volatile float %val28, ptr addrspace(1) @var
- store volatile float %val29, ptr addrspace(1) @var
- store volatile float %val30, ptr addrspace(1) @var
- store volatile float %val31, ptr addrspace(1) @var
- store volatile float %val32, ptr addrspace(1) @var
- store volatile float %val33, ptr addrspace(1) @var
- store volatile float %val34, ptr addrspace(1) @var
- store volatile float %val35, ptr addrspace(1) @var
- store volatile float %val36, ptr addrspace(1) @var
- store volatile float %val37, ptr addrspace(1) @var
- store volatile float %val38, ptr addrspace(1) @var
- store volatile float %val39, ptr addrspace(1) @var
- store volatile float %val40, ptr addrspace(1) @var
-
- ret void
-}
-attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
-
-; CHECK: amdhsa.kernels:
-; CHECK: .max_flat_workgroup_size: 64
-; CHECK: .name: min_64_max_64
-; CHECK: .max_flat_workgroup_size: 128
-; CHECK: .name: min_64_max_128
-; CHECK: .max_flat_workgroup_size: 128
-; CHECK: .name: min_128_max_128
-; CHECK: .max_flat_workgroup_size: 1024
-; CHECK: .name: min_1024_max_1024
-; CHECK: amdhsa.version:
-; CHECK: - 1
-; CHECK: - 0
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll
deleted file mode 100644
index 6c553e3726abf90..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll
+++ /dev/null
@@ -1,168 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=V3-GFX600 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=V3-GFX600 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=V3-GFX601 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=pitcairn < %s | FileCheck --check-prefixes=V3-GFX601 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=verde < %s | FileCheck --check-prefixes=V3-GFX601 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx602 < %s | FileCheck --check-prefixes=V3-GFX602 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hainan < %s | FileCheck --check-prefixes=V3-GFX602 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=oland < %s | FileCheck --check-prefixes=V3-GFX602 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=V3-GFX700 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck --check-prefixes=V3-GFX700 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx701 < %s | FileCheck --check-prefixes=V3-GFX701 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck --check-prefixes=V3-GFX701 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx702 < %s | FileCheck --check-prefixes=V3-GFX702 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck --check-prefixes=V3-GFX703 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kabini < %s | FileCheck --check-prefixes=V3-GFX703 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=mullins < %s | FileCheck --check-prefixes=V3-GFX703 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck --check-prefixes=V3-GFX704 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefixes=V3-GFX704 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx705 < %s | FileCheck --check-prefixes=V3-GFX705 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 < %s | FileCheck --check-prefixes=V3-GFX802 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefixes=V3-GFX802 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck --check-prefixes=V3-GFX802 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefixes=V3-GFX803 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck --check-prefixes=V3-GFX803 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris10 < %s | FileCheck --check-prefixes=V3-GFX803 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris11 < %s | FileCheck --check-prefixes=V3-GFX803 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx805 < %s | FileCheck --check-prefixes=V3-GFX805 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tongapro < %s | FileCheck --check-prefixes=V3-GFX805 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX900-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX902-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX904-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX909-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX940-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1011-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1013-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1033 < %s | FileCheck --check-prefixes=V3-GFX1033 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1034 < %s | FileCheck --check-prefixes=V3-GFX1034 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1035 < %s | FileCheck --check-prefixes=V3-GFX1035 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 < %s | FileCheck --check-prefixes=V3-GFX1036 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=V3-GFX1100 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1101 < %s | FileCheck --check-prefixes=V3-GFX1101 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1102 < %s | FileCheck --check-prefixes=V3-GFX1102 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 < %s | FileCheck --check-prefixes=V3-GFX1103 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1150 < %s | FileCheck --check-prefixes=V3-GFX1150 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 < %s | FileCheck --check-prefixes=V3-GFX1151 %s
-
-; V3-GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
-; V3-GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
-; V3-GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602"
-; V3-GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
-; V3-GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701"
-; V3-GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702"
-; V3-GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703"
-; V3-GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704"
-; V3-GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705"
-; V3-GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801"
-; V3-GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801+xnack"
-; V3-GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802"
-; V3-GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803"
-; V3-GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805"
-; V3-GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810"
-; V3-GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack"
-; V3-GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
-; V3-GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack"
-; V3-GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
-; V3-GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack"
-; V3-GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904"
-; V3-GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
-; V3-GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906"
-; V3-GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc"
-; V3-GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack"
-; V3-GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc"
-; V3-GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908"
-; V3-GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+sram-ecc"
-; V3-GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack"
-; V3-GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack+sram-ecc"
-; V3-GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909"
-; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack"
-; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
-; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack"
-; V3-GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+sram-ecc"
-; V3-GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
-; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
-; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
-; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011"
-; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack"
-; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012"
-; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack"
-; V3-GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013"
-; V3-GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013+xnack"
-; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030"
-; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031"
-; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032"
-; V3-GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033"
-; V3-GFX1034: .amdgcn_target "amdgcn-amd-amdhsa--gfx1034"
-; V3-GFX1035: .amdgcn_target "amdgcn-amd-amdhsa--gfx1035"
-; V3-GFX1036: .amdgcn_target "amdgcn-amd-amdhsa--gfx1036"
-; V3-GFX1100: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100"
-; V3-GFX1101: .amdgcn_target "amdgcn-amd-amdhsa--gfx1101"
-; V3-GFX1102: .amdgcn_target "amdgcn-amd-amdhsa--gfx1102"
-; V3-GFX1103: .amdgcn_target "amdgcn-amd-amdhsa--gfx1103"
-; V3-GFX1150: .amdgcn_target "amdgcn-amd-amdhsa--gfx1150"
-; V3-GFX1151: .amdgcn_target "amdgcn-amd-amdhsa--gfx1151"
-
-
-
-define amdgpu_kernel void @directive_amdgcn_target() {
- ret void
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
similarity index 98%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
index 37b124e7f59a09f..042abe382283a65 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
@@ -142,7 +142,7 @@ define amdgpu_kernel void @test_no_default_queue(i8 %a) #3
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
; CHECK-NOT: amdhsa.printf:
attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" }
@@ -151,7 +151,7 @@ attributes #2 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-implici
attributes #3 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="48" }
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
!1 = !{i32 0}
!2 = !{!"none"}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll
index 8e8023aa16f1303..fb08fd2c45085a1 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll
@@ -39,4 +39,4 @@ define internal void @bar.5() {
; PARSER: AMDGPU HSA Metadata Parser Test: PASS
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
index 69efc47008e6aad..dc3a6e8b633b233 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
@@ -1739,14 +1739,14 @@ define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr)
; CHECK-NEXT: - '2:1:8:%g\n'
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
attributes #0 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" }
attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
!llvm.printf.fmts = !{!100, !101}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
index 47b882494c9191a..f4892ebdc9c9374 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
@@ -286,7 +286,7 @@ entry:
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to
; avoid optimizing out the implicit argument allocation.
@@ -298,4 +298,4 @@ attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
similarity index 96%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
index cb3ae289721bc17..22c6e147762200d 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
@@ -35,12 +35,12 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" }
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
!1 = !{i32 0}
!2 = !{!"none"}
!3 = !{!"char"}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
index a3f8c5cff95df85..8f90025fe8e29c1 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
@@ -296,4 +296,4 @@ attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" }
attributes #4 = { noinline }
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll
similarity index 98%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll
index b7f58bbb51bb29d..6d49f22eb429b24 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll
@@ -96,10 +96,10 @@ define amdgpu_kernel void @test(ptr addrspace(1) %a,
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
!1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t",
!"image2d_t", !"image2d_array_t", !"image2d_array_depth_t",
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll
similarity index 80%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll
index 8117037baaffc9d..fc5e6e273125334 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll
@@ -5,9 +5,9 @@
; CHECK: ---
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
; CHECK: ...
!opencl.ocl.version = !{}
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll
similarity index 81%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll
index ea744863a9b8875..1ec79c95bc2a308 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll
@@ -5,10 +5,10 @@
; CHECK: ---
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
; CHECK: ...
!opencl.ocl.version = !{!0}
!llvm.module.flags = !{!1}
!0 = !{i32 1}
-!1 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!1 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
index d6f7a92af9dcb6f..e45c4d1786faf67 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
@@ -158,11 +158,11 @@ define amdgpu_kernel void @num_spilled_vgprs() #1 {
; CHECK: amdhsa.version:
; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
attributes #0 = { "amdgpu-num-sgpr"="14" }
attributes #1 = { "amdgpu-num-vgpr"="20" }
attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
index 9760e93eb48e6bd..9e6c0ef86906ddb 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
@@ -1,36 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
-; GFX8V3-LABEL: addrspacecast:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX8V3-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x40
-; GFX8V3-NEXT: v_mov_b32_e32 v4, 1
-; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT: s_cmp_lg_u32 s0, -1
-; GFX8V3-NEXT: s_cselect_b32 s3, s3, 0
-; GFX8V3-NEXT: s_cselect_b32 s0, s0, 0
-; GFX8V3-NEXT: s_cmp_lg_u32 s1, -1
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s3
-; GFX8V3-NEXT: s_cselect_b32 s0, s2, 0
-; GFX8V3-NEXT: s_cselect_b32 s1, s1, 0
-; GFX8V3-NEXT: v_mov_b32_e32 v2, s1
-; GFX8V3-NEXT: v_mov_b32_e32 v3, s0
-; GFX8V3-NEXT: flat_store_dword v[0:1], v4
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: v_mov_b32_e32 v0, 2
-; GFX8V3-NEXT: flat_store_dword v[2:3], v0
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: s_endpgm
-;
; GFX8V4-LABEL: addrspacecast:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -77,30 +52,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
-; GFX9V3-LABEL: addrspacecast:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT: s_mov_b64 s[2:3], src_private_base
-; GFX9V3-NEXT: s_mov_b64 s[4:5], src_shared_base
-; GFX9V3-NEXT: v_mov_b32_e32 v4, 1
-; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_cmp_lg_u32 s0, -1
-; GFX9V3-NEXT: s_cselect_b32 s2, s3, 0
-; GFX9V3-NEXT: s_cselect_b32 s0, s0, 0
-; GFX9V3-NEXT: s_cmp_lg_u32 s1, -1
-; GFX9V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX9V3-NEXT: v_mov_b32_e32 v1, s2
-; GFX9V3-NEXT: s_cselect_b32 s0, s5, 0
-; GFX9V3-NEXT: s_cselect_b32 s1, s1, 0
-; GFX9V3-NEXT: v_mov_b32_e32 v2, s1
-; GFX9V3-NEXT: v_mov_b32_e32 v3, s0
-; GFX9V3-NEXT: flat_store_dword v[0:1], v4
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: v_mov_b32_e32 v0, 2
-; GFX9V3-NEXT: flat_store_dword v[2:3], v0
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: s_endpgm
-;
; GFX9V4-LABEL: addrspacecast:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -156,18 +107,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
}
define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_is_shared:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x40
-; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4
-; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
-; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX8V3-NEXT: flat_store_dword v[0:1], v0
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: s_endpgm
-;
; GFX8V4-LABEL: llvm_amdgcn_is_shared:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x40
@@ -192,18 +131,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
-; GFX9V3-LABEL: llvm_amdgcn_is_shared:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4
-; GFX9V3-NEXT: s_mov_b64 s[0:1], src_shared_base
-; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1
-; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: s_endpgm
-;
; GFX9V4-LABEL: llvm_amdgcn_is_shared:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4
@@ -234,18 +161,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
}
define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_is_private:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_load_dword s0, s[4:5], 0x44
-; GFX8V3-NEXT: s_load_dword s1, s[6:7], 0x4
-; GFX8V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT: s_cmp_eq_u32 s1, s0
-; GFX8V3-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX8V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX8V3-NEXT: flat_store_dword v[0:1], v0
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: s_endpgm
-;
; GFX8V4-LABEL: llvm_amdgcn_is_private:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_load_dword s0, s[4:5], 0x44
@@ -270,18 +185,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
-; GFX9V3-LABEL: llvm_amdgcn_is_private:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_load_dword s2, s[4:5], 0x4
-; GFX9V3-NEXT: s_mov_b64 s[0:1], src_private_base
-; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: s_cmp_eq_u32 s2, s1
-; GFX9V3-NEXT: s_cselect_b64 s[0:1], -1, 0
-; GFX9V3-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX9V3-NEXT: global_store_dword v[0:1], v0, off
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: s_endpgm
-;
; GFX9V4-LABEL: llvm_amdgcn_is_private:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_load_dword s2, s[4:5], 0x4
@@ -312,11 +215,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
}
define amdgpu_kernel void @llvm_trap() {
-; GFX8V3-LABEL: llvm_trap:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_mov_b64 s[0:1], s[4:5]
-; GFX8V3-NEXT: s_trap 2
-;
; GFX8V4-LABEL: llvm_trap:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_mov_b64 s[0:1], s[4:5]
@@ -328,11 +226,6 @@ define amdgpu_kernel void @llvm_trap() {
; GFX8V5-NEXT: s_waitcnt lgkmcnt(0)
; GFX8V5-NEXT: s_trap 2
;
-; GFX9V3-LABEL: llvm_trap:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_mov_b64 s[0:1], s[4:5]
-; GFX9V3-NEXT: s_trap 2
-;
; GFX9V4-LABEL: llvm_trap:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_trap 2
@@ -345,10 +238,6 @@ define amdgpu_kernel void @llvm_trap() {
}
define amdgpu_kernel void @llvm_debugtrap() {
-; GFX8V3-LABEL: llvm_debugtrap:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: s_trap 3
-;
; GFX8V4-LABEL: llvm_debugtrap:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: s_trap 3
@@ -357,10 +246,6 @@ define amdgpu_kernel void @llvm_debugtrap() {
; GFX8V5: ; %bb.0:
; GFX8V5-NEXT: s_trap 3
;
-; GFX9V3-LABEL: llvm_debugtrap:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: s_trap 3
-;
; GFX9V4-LABEL: llvm_debugtrap:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: s_trap 3
@@ -373,31 +258,6 @@ define amdgpu_kernel void @llvm_debugtrap() {
}
define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_queue_ptr:
-; GFX8V3: ; %bb.0:
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s6
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s7
-; GFX8V3-NEXT: s_add_u32 s0, s8, 8
-; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT: s_addc_u32 s1, s9, 0
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
-; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s4
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s5
-; GFX8V3-NEXT: flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX8V3-NEXT: v_mov_b32_e32 v2, s10
-; GFX8V3-NEXT: v_mov_b32_e32 v3, s11
-; GFX8V3-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8V3-NEXT: v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT: v_mov_b32_e32 v1, s1
-; GFX8V3-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8V3-NEXT: s_waitcnt vmcnt(0)
-; GFX8V3-NEXT: s_endpgm
-;
; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
; GFX8V4: ; %bb.0:
; GFX8V4-NEXT: v_mov_b32_e32 v0, s6
@@ -446,23 +306,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr) {
; GFX8V5-NEXT: s_waitcnt vmcnt(0)
; GFX8V5-NEXT: s_endpgm
;
-; GFX9V3-LABEL: llvm_amdgcn_queue_ptr:
-; GFX9V3: ; %bb.0:
-; GFX9V3-NEXT: v_mov_b32_e32 v2, 0
-; GFX9V3-NEXT: global_load_ubyte v0, v2, s[6:7] glc
-; GFX9V3-NEXT: global_load_ubyte v0, v2, s[8:9] offset:8 glc
-; GFX9V3-NEXT: global_load_ubyte v0, v2, s[4:5] glc
-; GFX9V3-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: v_mov_b32_e32 v0, s10
-; GFX9V3-NEXT: v_mov_b32_e32 v1, s11
-; GFX9V3-NEXT: ; kill: killed $sgpr6_sgpr7
-; GFX9V3-NEXT: ; kill: killed $sgpr4_sgpr5
-; GFX9V3-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9V3-NEXT: s_waitcnt vmcnt(0)
-; GFX9V3-NEXT: s_endpgm
-;
; GFX9V4-LABEL: llvm_amdgcn_queue_ptr:
; GFX9V4: ; %bb.0:
; GFX9V4-NEXT: v_mov_b32_e32 v2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
index 0353e7ee49ab953..a04fe28dbffffab 100644
--- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
@@ -1,17 +1,8 @@
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s
; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA %s
declare void @llvm.trap() #0
-; HSA: .amdhsa_kernel trap
-; HSA-NEXT: .amdhsa_group_segment_fixed_size 0
-; HSA-NEXT: .amdhsa_private_segment_fixed_size 0
-; HSA-NEXT: .amdhsa_kernarg_size 8
-; HSA-NEXT: .amdhsa_user_sgpr_count 8
-; HSA-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
-; HSA: .end_amdhsa_kernel
-
; DOORBELL: .amdhsa_kernel trap
; DOORBELL-NEXT: .amdhsa_group_segment_fixed_size 0
; DOORBELL-NEXT: .amdhsa_private_segment_fixed_size 0
diff --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
index 792ec2675247f61..9ed896c148e6485 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -317,4 +317,4 @@ attributes #1 = { nounwind "stackrealign" }
attributes #2 = { nounwind alignstack=128 }
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
index 03ea582698486ce..54a15513cf0a506 100644
--- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
@@ -1,101 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s
+; RUN: llc %s -o - -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900 %s
+; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803 %s
+; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900 %s
+; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900 %s
declare void @llvm.trap() #0
declare void @llvm.debugtrap() #1
define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
-; NOHSA-TRAP-GFX900-V3-LABEL: trap:
-; NOHSA-TRAP-GFX900-V3: ; %bb.0:
-; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
-; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
-; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
-;
-; NOHSA-TRAP-GFX900-V4-LABEL: trap:
-; NOHSA-TRAP-GFX900-V4: ; %bb.0:
-; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
-; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm
-;
-; HSA-TRAP-GFX803-V3-LABEL: trap:
-; HSA-TRAP-GFX803-V3: ; %bb.0:
-; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1
-; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s2
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s3
-; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT: s_trap 2
-;
-; HSA-TRAP-GFX803-V4-LABEL: trap:
-; HSA-TRAP-GFX803-V4: ; %bb.0:
-; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1
-; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s2
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s3
-; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT: s_trap 2
-;
-; HSA-TRAP-GFX900-V3-LABEL: trap:
-; HSA-TRAP-GFX900-V3: ; %bb.0:
-; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
-; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
-; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[2:3]
-; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT: s_trap 2
-;
-; HSA-TRAP-GFX900-V4-LABEL: trap:
-; HSA-TRAP-GFX900-V4: ; %bb.0:
-; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
-; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT: s_trap 2
-;
-; HSA-NOTRAP-GFX900-V3-LABEL: trap:
-; HSA-NOTRAP-GFX900-V3: ; %bb.0:
-; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
-; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
-;
-; HSA-NOTRAP-GFX900-V4-LABEL: trap:
-; HSA-NOTRAP-GFX900-V4: ; %bb.0:
-; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
-; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm
+; NOHSA-TRAP-GFX900-LABEL: trap:
+; NOHSA-TRAP-GFX900: ; %bb.0:
+; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX803-LABEL: trap:
+; HSA-TRAP-GFX803: ; %bb.0:
+; HSA-TRAP-GFX803-NEXT: s_load_dwordx2 s[2:3], s[6:7], 0x0
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s2
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s3
+; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX900-LABEL: trap:
+; HSA-TRAP-GFX900: ; %bb.0:
+; HSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT: s_trap 2
+;
+; HSA-NOTRAP-GFX900-LABEL: trap:
+; HSA-NOTRAP-GFX900: ; %bb.0:
+; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT: s_endpgm
store volatile i32 1, ptr addrspace(1) %arg0
call void @llvm.trap()
unreachable
@@ -104,150 +57,77 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
}
define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
-; NOHSA-TRAP-GFX900-V3-LABEL: non_entry_trap:
-; NOHSA-TRAP-GFX900-V3: ; %bb.0: ; %entry
-; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[2:3], 0x24
-; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc
-; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
-; NOHSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2
-; NOHSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret
-; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3
-; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
-; NOHSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap
-; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
-;
-; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap:
-; NOHSA-TRAP-GFX900-V4: ; %bb.0: ; %entry
-; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
-; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
-; NOHSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2
-; NOHSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
-; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
-; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm
-; NOHSA-TRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap
-; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm
-;
-; HSA-TRAP-GFX803-V3-LABEL: non_entry_trap:
-; HSA-TRAP-GFX803-V3: ; %bb.0: ; %entry
-; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V3-NEXT: flat_load_dword v0, v[0:1] glc
-; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
-; HSA-TRAP-GFX803-V3-NEXT: s_cbranch_vccz .LBB1_2
-; HSA-TRAP-GFX803-V3-NEXT: ; %bb.1: ; %ret
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 3
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT: s_endpgm
-; HSA-TRAP-GFX803-V3-NEXT: .LBB1_2: ; %trap
-; HSA-TRAP-GFX803-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX803-V3-NEXT: s_trap 2
-;
-; HSA-TRAP-GFX803-V4-LABEL: non_entry_trap:
-; HSA-TRAP-GFX803-V4: ; %bb.0: ; %entry
-; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V4-NEXT: flat_load_dword v0, v[0:1] glc
-; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
-; HSA-TRAP-GFX803-V4-NEXT: s_cbranch_vccz .LBB1_2
-; HSA-TRAP-GFX803-V4-NEXT: ; %bb.1: ; %ret
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 3
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT: s_endpgm
-; HSA-TRAP-GFX803-V4-NEXT: .LBB1_2: ; %trap
-; HSA-TRAP-GFX803-V4-NEXT: s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX803-V4-NEXT: s_trap 2
-;
-; HSA-TRAP-GFX900-V3-LABEL: non_entry_trap:
-; HSA-TRAP-GFX900-V3: ; %bb.0: ; %entry
-; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc
-; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
-; HSA-TRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2
-; HSA-TRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret
-; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3
-; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT: s_endpgm
-; HSA-TRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap
-; HSA-TRAP-GFX900-V3-NEXT: s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX900-V3-NEXT: s_trap 2
-;
-; HSA-TRAP-GFX900-V4-LABEL: non_entry_trap:
-; HSA-TRAP-GFX900-V4: ; %bb.0: ; %entry
-; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
-; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
-; HSA-TRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2
-; HSA-TRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
-; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
-; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT: s_endpgm
-; HSA-TRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap
-; HSA-TRAP-GFX900-V4-NEXT: s_trap 2
-;
-; HSA-NOTRAP-GFX900-V3-LABEL: non_entry_trap:
-; HSA-NOTRAP-GFX900-V3: ; %bb.0: ; %entry
-; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT: global_load_dword v1, v0, s[0:1] glc
-; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
-; HSA-NOTRAP-GFX900-V3-NEXT: s_cbranch_vccz .LBB1_2
-; HSA-NOTRAP-GFX900-V3-NEXT: ; %bb.1: ; %ret
-; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 3
-; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
-; HSA-NOTRAP-GFX900-V3-NEXT: .LBB1_2: ; %trap
-; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
-;
-; HSA-NOTRAP-GFX900-V4-LABEL: non_entry_trap:
-; HSA-NOTRAP-GFX900-V4: ; %bb.0: ; %entry
-; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT: global_load_dword v1, v0, s[0:1] glc
-; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
-; HSA-NOTRAP-GFX900-V4-NEXT: s_cbranch_vccz .LBB1_2
-; HSA-NOTRAP-GFX900-V4-NEXT: ; %bb.1: ; %ret
-; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 3
-; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm
-; HSA-NOTRAP-GFX900-V4-NEXT: .LBB1_2: ; %trap
-; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm
+; NOHSA-TRAP-GFX900-LABEL: non_entry_trap:
+; NOHSA-TRAP-GFX900: ; %bb.0: ; %entry
+; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
+; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; NOHSA-TRAP-GFX900-NEXT: s_cbranch_vccz .LBB1_2
+; NOHSA-TRAP-GFX900-NEXT: ; %bb.1: ; %ret
+; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 3
+; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT: s_endpgm
+; NOHSA-TRAP-GFX900-NEXT: .LBB1_2: ; %trap
+; NOHSA-TRAP-GFX900-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX803-LABEL: non_entry_trap:
+; HSA-TRAP-GFX803: ; %bb.0: ; %entry
+; HSA-TRAP-GFX803-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-NEXT: flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT: v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-TRAP-GFX803-NEXT: s_cbranch_vccz .LBB1_2
+; HSA-TRAP-GFX803-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v2, 3
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT: s_endpgm
+; HSA-TRAP-GFX803-NEXT: .LBB1_2: ; %trap
+; HSA-TRAP-GFX803-NEXT: s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-NEXT: s_trap 2
+;
+; HSA-TRAP-GFX900-LABEL: non_entry_trap:
+; HSA-TRAP-GFX900: ; %bb.0: ; %entry
+; HSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
+; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-TRAP-GFX900-NEXT: s_cbranch_vccz .LBB1_2
+; HSA-TRAP-GFX900-NEXT: ; %bb.1: ; %ret
+; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 3
+; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT: s_endpgm
+; HSA-TRAP-GFX900-NEXT: .LBB1_2: ; %trap
+; HSA-TRAP-GFX900-NEXT: s_trap 2
+;
+; HSA-NOTRAP-GFX900-LABEL: non_entry_trap:
+; HSA-NOTRAP-GFX900: ; %bb.0: ; %entry
+; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT: global_load_dword v1, v0, s[0:1] glc
+; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT: v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-NOTRAP-GFX900-NEXT: s_cbranch_vccz .LBB1_2
+; HSA-NOTRAP-GFX900-NEXT: ; %bb.1: ; %ret
+; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v1, 3
+; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT: s_endpgm
+; HSA-NOTRAP-GFX900-NEXT: .LBB1_2: ; %trap
+; HSA-NOTRAP-GFX900-NEXT: s_endpgm
entry:
%tmp29 = load volatile i32, ptr addrspace(1) %arg0
%cmp = icmp eq i32 %tmp29, -1
@@ -263,115 +143,60 @@ ret:
}
define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0) {
-; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap:
-; NOHSA-TRAP-GFX900-V3: ; %bb.0:
-; NOHSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
-; NOHSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2
-; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]
-; NOHSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT: s_endpgm
-;
-; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap:
-; NOHSA-TRAP-GFX900-V4: ; %bb.0:
-; NOHSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
-; NOHSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2
-; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]
-; NOHSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT: s_endpgm
-;
-; HSA-TRAP-GFX803-V3-LABEL: debugtrap:
-; HSA-TRAP-GFX803-V3: ; %bb.0:
-; HSA-TRAP-GFX803-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v2, 1
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v3, 2
-; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V3-NEXT: v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT: s_trap 3
-; HSA-TRAP-GFX803-V3-NEXT: flat_store_dword v[0:1], v3
-; HSA-TRAP-GFX803-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT: s_endpgm
-;
-; HSA-TRAP-GFX803-V4-LABEL: debugtrap:
-; HSA-TRAP-GFX803-V4: ; %bb.0:
-; HSA-TRAP-GFX803-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v2, 1
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v3, 2
-; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V4-NEXT: v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT: s_trap 3
-; HSA-TRAP-GFX803-V4-NEXT: flat_store_dword v[0:1], v3
-; HSA-TRAP-GFX803-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT: s_endpgm
-;
-; HSA-TRAP-GFX900-V3-LABEL: debugtrap:
-; HSA-TRAP-GFX900-V3: ; %bb.0:
-; HSA-TRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
-; HSA-TRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2
-; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT: s_trap 3
-; HSA-TRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]
-; HSA-TRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT: s_endpgm
-;
-; HSA-TRAP-GFX900-V4-LABEL: debugtrap:
-; HSA-TRAP-GFX900-V4: ; %bb.0:
-; HSA-TRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
-; HSA-TRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2
-; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT: s_trap 3
-; HSA-TRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]
-; HSA-TRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT: s_endpgm
-;
-; HSA-NOTRAP-GFX900-V3-LABEL: debugtrap:
-; HSA-NOTRAP-GFX900-V3: ; %bb.0:
-; HSA-NOTRAP-GFX900-V3-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v1, 1
-; HSA-NOTRAP-GFX900-V3-NEXT: v_mov_b32_e32 v2, 2
-; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT: global_store_dword v0, v2, s[0:1]
-; HSA-NOTRAP-GFX900-V3-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT: s_endpgm
-;
-; HSA-NOTRAP-GFX900-V4-LABEL: debugtrap:
-; HSA-NOTRAP-GFX900-V4: ; %bb.0:
-; HSA-NOTRAP-GFX900-V4-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v1, 1
-; HSA-NOTRAP-GFX900-V4-NEXT: v_mov_b32_e32 v2, 2
-; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT: global_store_dword v0, v2, s[0:1]
-; HSA-NOTRAP-GFX900-V4-NEXT: s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT: s_endpgm
+; NOHSA-TRAP-GFX900-LABEL: debugtrap:
+; NOHSA-TRAP-GFX900: ; %bb.0:
+; NOHSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v2, 2
+; NOHSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT: global_store_dword v0, v2, s[0:1]
+; NOHSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX803-LABEL: debugtrap:
+; HSA-TRAP-GFX803: ; %bb.0:
+; HSA-TRAP-GFX803-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v3, 2
+; HSA-TRAP-GFX803-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-NEXT: v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT: s_trap 3
+; HSA-TRAP-GFX803-NEXT: flat_store_dword v[0:1], v3
+; HSA-TRAP-GFX803-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT: s_endpgm
+;
+; HSA-TRAP-GFX900-LABEL: debugtrap:
+; HSA-TRAP-GFX900: ; %bb.0:
+; HSA-TRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-NEXT: v_mov_b32_e32 v2, 2
+; HSA-TRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT: s_trap 3
+; HSA-TRAP-GFX900-NEXT: global_store_dword v0, v2, s[0:1]
+; HSA-TRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT: s_endpgm
+;
+; HSA-NOTRAP-GFX900-LABEL: debugtrap:
+; HSA-NOTRAP-GFX900: ; %bb.0:
+; HSA-NOTRAP-GFX900-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-NEXT: v_mov_b32_e32 v2, 2
+; HSA-NOTRAP-GFX900-NEXT: s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT: global_store_dword v0, v2, s[0:1]
+; HSA-NOTRAP-GFX900-NEXT: s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT: s_endpgm
store volatile i32 1, ptr addrspace(1) %arg0
call void @llvm.debugtrap()
store volatile i32 2, ptr addrspace(1) %arg0
@@ -382,4 +207,4 @@ attributes #0 = { nounwind noreturn }
attributes #1 = { nounwind }
!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
diff --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s
similarity index 94%
rename from llvm/test/MC/AMDGPU/hsa-diag-v3.s
rename to llvm/test/MC/AMDGPU/hsa-diag-v4.s
index 369ac905ad2b27b..f7a554aedb746b0 100644
--- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s
@@ -1,18 +1,18 @@
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,PREGFX10,AMDHSA
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX10,AMDHSA
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX11,AMDHSA
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,PREGFX10,AMDHSA,ALL
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,PREGFX10,AMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX10,AMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX11,AMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,PREGFX10,AMDHSA,ALL
.text
// GCN-LABEL: warning: test_target
// GFX8-NOT: error:
-// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1010+xnack
-// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1100
-// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-unknown--gfx810
+// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1010:xnack+
+// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1100
+// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-unknown--gfx810
.warning "test_target"
-.amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack"
+.amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+"
// GCN-LABEL: warning: test_amdhsa_kernel_no_name
// GCN: error: unknown directive
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
deleted file mode 100644
index ba60000837cdc06..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
+++ /dev/null
@@ -1,226 +0,0 @@
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
-// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
-// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
-// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
-// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
-// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
-// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
-// special_sgpr
-// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
-
-.p2align 8
-.type minimal, at function
-minimal:
- s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
- s_endpgm
-
-.p2align 8
-.type special_sgpr, at function
-special_sgpr:
- s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
- .amdhsa_next_free_vgpr 0
- .amdhsa_next_free_sgpr 0
- .amdhsa_shared_vgpr_count 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM: .amdhsa_shared_vgpr_count 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
- .amdhsa_group_segment_fixed_size 1
- .amdhsa_private_segment_fixed_size 1
- .amdhsa_kernarg_size 8
- .amdhsa_user_sgpr_private_segment_buffer 1
- .amdhsa_user_sgpr_dispatch_ptr 1
- .amdhsa_user_sgpr_queue_ptr 1
- .amdhsa_user_sgpr_kernarg_segment_ptr 1
- .amdhsa_user_sgpr_dispatch_id 1
- .amdhsa_user_sgpr_flat_scratch_init 1
- .amdhsa_user_sgpr_private_segment_size 1
- .amdhsa_wavefront_size32 1
- .amdhsa_system_sgpr_private_segment_wavefront_offset 1
- .amdhsa_system_sgpr_workgroup_id_x 0
- .amdhsa_system_sgpr_workgroup_id_y 1
- .amdhsa_system_sgpr_workgroup_id_z 1
- .amdhsa_system_sgpr_workgroup_info 1
- .amdhsa_system_vgpr_workitem_id 1
- .amdhsa_next_free_vgpr 9
- .amdhsa_next_free_sgpr 27
- .amdhsa_reserve_vcc 0
- .amdhsa_reserve_flat_scratch 0
- .amdhsa_reserve_xnack_mask 1
- .amdhsa_float_round_mode_32 1
- .amdhsa_float_round_mode_16_64 1
- .amdhsa_float_denorm_mode_32 1
- .amdhsa_float_denorm_mode_16_64 0
- .amdhsa_dx10_clamp 0
- .amdhsa_ieee_mode 0
- .amdhsa_fp16_overflow 1
- .amdhsa_workgroup_processor_mode 1
- .amdhsa_memory_ordered 1
- .amdhsa_forward_progress 1
- .amdhsa_exception_fp_ieee_invalid_op 1
- .amdhsa_exception_fp_denorm_src 1
- .amdhsa_exception_fp_ieee_div_zero 1
- .amdhsa_exception_fp_ieee_overflow 1
- .amdhsa_exception_fp_ieee_underflow 1
- .amdhsa_exception_fp_ieee_inexact 1
- .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 15
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_wavefront_size32 1
-// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
-// ASM-NEXT: .amdhsa_memory_ordered 1
-// ASM-NEXT: .amdhsa_forward_progress 1
-// ASM-NEXT: .amdhsa_shared_vgpr_count 0
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-// Test that we are including special SGPR usage in the granulated count.
-.p2align 6
-.amdhsa_kernel special_sgpr
- // Same next_free_sgpr as "complete", but...
- .amdhsa_next_free_sgpr 27
- // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
- // 3 granules to 4
- .amdhsa_reserve_flat_scratch 1
-
- .amdhsa_reserve_vcc 0
- .amdhsa_reserve_xnack_mask 1
-
- .amdhsa_float_denorm_mode_16_64 0
- .amdhsa_dx10_clamp 0
- .amdhsa_ieee_mode 0
- .amdhsa_next_free_vgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel special_sgpr
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
deleted file mode 100644
index 7f885b457aa63a3..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
+++ /dev/null
@@ -1,213 +0,0 @@
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 -filetype=obj < %s > %t
-// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 0000c0 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
-// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
-// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
-// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
-// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
-// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e040000 00000000
-// special_sgpr
-// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx1100"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100"
-
-.p2align 8
-.type minimal, at function
-minimal:
- s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
- s_endpgm
-
-.p2align 8
-.type special_sgpr, at function
-special_sgpr:
- s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
- .amdhsa_next_free_vgpr 0
- .amdhsa_next_free_sgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
- .amdhsa_group_segment_fixed_size 1
- .amdhsa_private_segment_fixed_size 1
- .amdhsa_kernarg_size 8
- .amdhsa_user_sgpr_dispatch_ptr 1
- .amdhsa_user_sgpr_queue_ptr 1
- .amdhsa_user_sgpr_kernarg_segment_ptr 1
- .amdhsa_user_sgpr_dispatch_id 1
- .amdhsa_user_sgpr_private_segment_size 1
- .amdhsa_wavefront_size32 1
- .amdhsa_enable_private_segment 1
- .amdhsa_system_sgpr_workgroup_id_x 0
- .amdhsa_system_sgpr_workgroup_id_y 1
- .amdhsa_system_sgpr_workgroup_id_z 1
- .amdhsa_system_sgpr_workgroup_info 1
- .amdhsa_system_vgpr_workitem_id 1
- .amdhsa_next_free_vgpr 9
- .amdhsa_next_free_sgpr 27
- .amdhsa_reserve_vcc 0
- .amdhsa_float_round_mode_32 1
- .amdhsa_float_round_mode_16_64 1
- .amdhsa_float_denorm_mode_32 1
- .amdhsa_float_denorm_mode_16_64 0
- .amdhsa_dx10_clamp 0
- .amdhsa_ieee_mode 0
- .amdhsa_fp16_overflow 1
- .amdhsa_workgroup_processor_mode 1
- .amdhsa_memory_ordered 1
- .amdhsa_forward_progress 1
- .amdhsa_exception_fp_ieee_invalid_op 1
- .amdhsa_exception_fp_denorm_src 1
- .amdhsa_exception_fp_ieee_div_zero 1
- .amdhsa_exception_fp_ieee_overflow 1
- .amdhsa_exception_fp_ieee_underflow 1
- .amdhsa_exception_fp_ieee_inexact 1
- .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 9
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_wavefront_size32 1
-// ASM-NEXT: .amdhsa_enable_private_segment 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
-// ASM-NEXT: .amdhsa_memory_ordered 1
-// ASM-NEXT: .amdhsa_forward_progress 1
-// ASM-NEXT: .amdhsa_shared_vgpr_count 0
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-// Test that we are including special SGPR usage in the granulated count.
-.p2align 6
-.amdhsa_kernel special_sgpr
- // Same next_free_sgpr as "complete", but...
- .amdhsa_next_free_sgpr 27
- // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
- // 3 granules to 4
-
- .amdhsa_reserve_vcc 0
-
- .amdhsa_float_denorm_mode_16_64 0
- .amdhsa_dx10_clamp 0
- .amdhsa_ieee_mode 0
- .amdhsa_next_free_vgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel special_sgpr
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 11
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
deleted file mode 100644
index fd84fab8af81685..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
+++ /dev/null
@@ -1,184 +0,0 @@
-// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t
-// RUN: llvm-readobj --elf-output-style=GNU --sections --symbols --relocations %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
-// OBJDUMP-NEXT: 0070 c1500104 210f007f 7f008100 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc"
-
-.p2align 8
-.type minimal, at function
-minimal:
- s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
- s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
- .amdhsa_next_free_vgpr 0
- .amdhsa_next_free_sgpr 0
- .amdhsa_accum_offset 4
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM-NEXT: .amdhsa_accum_offset 4
-// ASM: .amdhsa_tg_split 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
- .amdhsa_group_segment_fixed_size 1
- .amdhsa_private_segment_fixed_size 1
- .amdhsa_user_sgpr_private_segment_buffer 1
- .amdhsa_user_sgpr_dispatch_ptr 1
- .amdhsa_user_sgpr_queue_ptr 1
- .amdhsa_user_sgpr_kernarg_segment_ptr 1
- .amdhsa_user_sgpr_dispatch_id 1
- .amdhsa_user_sgpr_flat_scratch_init 1
- .amdhsa_kernarg_size 8
- .amdhsa_user_sgpr_kernarg_preload_length 1
- .amdhsa_user_sgpr_kernarg_preload_offset 1
- .amdhsa_user_sgpr_private_segment_size 1
- .amdhsa_system_sgpr_private_segment_wavefront_offset 1
- .amdhsa_system_sgpr_workgroup_id_x 0
- .amdhsa_system_sgpr_workgroup_id_y 1
- .amdhsa_system_sgpr_workgroup_id_z 1
- .amdhsa_system_sgpr_workgroup_info 1
- .amdhsa_system_vgpr_workitem_id 1
- .amdhsa_next_free_vgpr 9
- .amdhsa_next_free_sgpr 27
- .amdhsa_accum_offset 4
- .amdhsa_reserve_vcc 0
- .amdhsa_reserve_flat_scratch 0
- .amdhsa_float_round_mode_32 1
- .amdhsa_float_round_mode_16_64 1
- .amdhsa_float_denorm_mode_32 1
- .amdhsa_float_denorm_mode_16_64 0
- .amdhsa_dx10_clamp 0
- .amdhsa_ieee_mode 0
- .amdhsa_fp16_overflow 1
- .amdhsa_tg_split 1
- .amdhsa_exception_fp_ieee_invalid_op 1
- .amdhsa_exception_fp_denorm_src 1
- .amdhsa_exception_fp_ieee_div_zero 1
- .amdhsa_exception_fp_ieee_overflow 1
- .amdhsa_exception_fp_ieee_underflow 1
- .amdhsa_exception_fp_ieee_inexact 1
- .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 16
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_accum_offset 4
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_tg_split 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 9
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
deleted file mode 100644
index 9624515ecd6fb90..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
+++ /dev/null
@@ -1,178 +0,0 @@
-// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s > %t
-// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
-// OBJDUMP-NEXT: 0070 01510104 150f007f 5e008100 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
-
-.p2align 8
-.type minimal, at function
-minimal:
- s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
- s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
- .amdhsa_next_free_vgpr 0
- .amdhsa_next_free_sgpr 0
- .amdhsa_accum_offset 4
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM-NEXT: .amdhsa_accum_offset 4
-// ASM: .amdhsa_tg_split 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
- .amdhsa_group_segment_fixed_size 1
- .amdhsa_private_segment_fixed_size 1
- .amdhsa_user_sgpr_dispatch_ptr 1
- .amdhsa_user_sgpr_queue_ptr 1
- .amdhsa_user_sgpr_kernarg_segment_ptr 1
- .amdhsa_user_sgpr_dispatch_id 1
- .amdhsa_kernarg_size 8
- .amdhsa_user_sgpr_kernarg_preload_length 1
- .amdhsa_user_sgpr_kernarg_preload_offset 1
- .amdhsa_user_sgpr_private_segment_size 1
- .amdhsa_enable_private_segment 1
- .amdhsa_system_sgpr_workgroup_id_x 0
- .amdhsa_system_sgpr_workgroup_id_y 1
- .amdhsa_system_sgpr_workgroup_id_z 1
- .amdhsa_system_sgpr_workgroup_info 1
- .amdhsa_system_vgpr_workitem_id 1
- .amdhsa_next_free_vgpr 9
- .amdhsa_next_free_sgpr 27
- .amdhsa_accum_offset 4
- .amdhsa_reserve_vcc 0
- .amdhsa_float_round_mode_32 1
- .amdhsa_float_round_mode_16_64 1
- .amdhsa_float_denorm_mode_32 1
- .amdhsa_float_denorm_mode_16_64 0
- .amdhsa_dx10_clamp 0
- .amdhsa_ieee_mode 0
- .amdhsa_fp16_overflow 1
- .amdhsa_tg_split 1
- .amdhsa_exception_fp_ieee_invalid_op 1
- .amdhsa_exception_fp_denorm_src 1
- .amdhsa_exception_fp_ieee_div_zero 1
- .amdhsa_exception_fp_ieee_overflow 1
- .amdhsa_exception_fp_ieee_underflow 1
- .amdhsa_exception_fp_ieee_inexact 1
- .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 10
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset 1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_enable_private_segment 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_accum_offset 4
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_tg_split 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 9
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4
diff --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s
deleted file mode 100644
index 9f854986d7bc447..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-v3.s
+++ /dev/null
@@ -1,304 +0,0 @@
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
-// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000100 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
-// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
-// READOBJ-NEXT: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
-// READOBJ-NEXT: 0000000000000200 0 FUNC LOCAL PROTECTED 2 special_sgpr
-// READOBJ-NEXT: 0000000000000300 0 FUNC LOCAL PROTECTED 2 disabled_user_sgpr
-// READOBJ-NEXT: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
-// READOBJ-NEXT: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
-// READOBJ-NEXT: 0000000000000080 64 OBJECT LOCAL DEFAULT 3 special_sgpr.kd
-// READOBJ-NEXT: 00000000000000c0 64 OBJECT LOCAL DEFAULT 3 disabled_user_sgpr.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
-// special_sgpr
-// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000
-// disabled_user_sgpr
-// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
-
-.p2align 8
-.type minimal, at function
-minimal:
- s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
- s_endpgm
-
-.p2align 8
-.type special_sgpr, at function
-special_sgpr:
- s_endpgm
-
-.p2align 8
-.type disabled_user_sgpr, at function
-disabled_user_sgpr:
- s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
- .amdhsa_next_free_vgpr 0
- .amdhsa_next_free_sgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
- .amdhsa_group_segment_fixed_size 1
- .amdhsa_private_segment_fixed_size 1
- .amdhsa_kernarg_size 8
- .amdhsa_user_sgpr_private_segment_buffer 1
- .amdhsa_user_sgpr_dispatch_ptr 1
- .amdhsa_user_sgpr_queue_ptr 1
- .amdhsa_user_sgpr_kernarg_segment_ptr 1
- .amdhsa_user_sgpr_dispatch_id 1
- .amdhsa_user_sgpr_flat_scratch_init 1
- .amdhsa_user_sgpr_private_segment_size 1
- .amdhsa_system_sgpr_private_segment_wavefront_offset 1
- .amdhsa_system_sgpr_workgroup_id_x 0
- .amdhsa_system_sgpr_workgroup_id_y 1
- .amdhsa_system_sgpr_workgroup_id_z 1
- .amdhsa_system_sgpr_workgroup_info 1
- .amdhsa_system_vgpr_workitem_id 1
- .amdhsa_next_free_vgpr 9
- .amdhsa_next_free_sgpr 27
- .amdhsa_reserve_vcc 0
- .amdhsa_reserve_flat_scratch 0
- .amdhsa_reserve_xnack_mask 1
- .amdhsa_float_round_mode_32 1
- .amdhsa_float_round_mode_16_64 1
- .amdhsa_float_denorm_mode_32 1
- .amdhsa_float_denorm_mode_16_64 0
- .amdhsa_dx10_clamp 0
- .amdhsa_ieee_mode 0
- .amdhsa_fp16_overflow 1
- .amdhsa_exception_fp_ieee_invalid_op 1
- .amdhsa_exception_fp_denorm_src 1
- .amdhsa_exception_fp_ieee_div_zero 1
- .amdhsa_exception_fp_ieee_overflow 1
- .amdhsa_exception_fp_ieee_underflow 1
- .amdhsa_exception_fp_ieee_inexact 1
- .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 15
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-// Test that we are including special SGPR usage in the granulated count.
-.p2align 6
-.amdhsa_kernel special_sgpr
- // Same next_free_sgpr as "complete", but...
- .amdhsa_next_free_sgpr 27
- // ...on GFX9 this should require an additional 6 SGPRs, pushing us from
- // 3 granules to 4
- .amdhsa_reserve_flat_scratch 1
-
- .amdhsa_reserve_vcc 0
- .amdhsa_reserve_xnack_mask 1
-
- .amdhsa_float_denorm_mode_16_64 0
- .amdhsa_dx10_clamp 0
- .amdhsa_ieee_mode 0
- .amdhsa_next_free_vgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel special_sgpr
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM: .end_amdhsa_kernel
-
-// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
-// count, i.e. this should produce the same descriptor as minimal.
-.p2align 6
-.amdhsa_kernel disabled_user_sgpr
- .amdhsa_user_sgpr_private_segment_buffer 0
- .amdhsa_next_free_vgpr 0
- .amdhsa_next_free_sgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel disabled_user_sgpr
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 9
-
-.byte .amdgcn.gfx_generation_minor
-// ASM: .byte 0
-
-.byte .amdgcn.gfx_generation_stepping
-// ASM: .byte 4
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4
-
-// Metadata
-
-.amdgpu_metadata
- amdhsa.version:
- - 3
- - 0
- amdhsa.kernels:
- - .name: amd_kernel_code_t_test_all
- .symbol: amd_kernel_code_t_test_all at kd
- .kernarg_segment_size: 8
- .group_segment_fixed_size: 16
- .private_segment_fixed_size: 32
- .kernarg_segment_align: 64
- .wavefront_size: 128
- .sgpr_count: 14
- .vgpr_count: 40
- .max_flat_workgroup_size: 256
- - .name: amd_kernel_code_t_minimal
- .symbol: amd_kernel_code_t_minimal at kd
- .kernarg_segment_size: 8
- .group_segment_fixed_size: 16
- .private_segment_fixed_size: 32
- .kernarg_segment_align: 64
- .wavefront_size: 128
- .sgpr_count: 14
- .vgpr_count: 40
- .max_flat_workgroup_size: 256
-.end_amdgpu_metadata
-
-// ASM: .amdgpu_metadata
-// ASM: amdhsa.kernels:
-// ASM: - .group_segment_fixed_size: 16
-// ASM: .kernarg_segment_align: 64
-// ASM: .kernarg_segment_size: 8
-// ASM: .max_flat_workgroup_size: 256
-// ASM: .name: amd_kernel_code_t_test_all
-// ASM: .private_segment_fixed_size: 32
-// ASM: .sgpr_count: 14
-// ASM: .symbol: 'amd_kernel_code_t_test_all at kd'
-// ASM: .vgpr_count: 40
-// ASM: .wavefront_size: 128
-// ASM: - .group_segment_fixed_size: 16
-// ASM: .kernarg_segment_align: 64
-// ASM: .kernarg_segment_size: 8
-// ASM: .max_flat_workgroup_size: 256
-// ASM: .name: amd_kernel_code_t_minimal
-// ASM: .private_segment_fixed_size: 32
-// ASM: .sgpr_count: 14
-// ASM: .symbol: 'amd_kernel_code_t_minimal at kd'
-// ASM: .vgpr_count: 40
-// ASM: .wavefront_size: 128
-// ASM: amdhsa.version:
-// ASM-NEXT: - 3
-// ASM-NEXT: - 0
-// ASM: .end_amdgpu_metadata
diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
index 63e532e0ffa3768..7e3ae8424cc7bdc 100644
--- a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s
.amdhsa_kernel implied_count_too_low_0
.amdhsa_user_sgpr_count 0
diff --git a/llvm/test/MC/AMDGPU/user-sgpr-count.s b/llvm/test/MC/AMDGPU/user-sgpr-count.s
index aa8970185eb04bc..950c514f786b254 100644
--- a/llvm/test/MC/AMDGPU/user-sgpr-count.s
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count.s
@@ -1,10 +1,10 @@
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
.text
// ASM: .text
-.amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc"
+.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
// ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count
More information about the cfe-commits
mailing list