[clang] 544d912 - [AMDGPU] Remove Code Object V3 (#67118)

via cfe-commits cfe-commits at lists.llvm.org
Sun Oct 15 23:21:54 PDT 2023


Author: Pierre van Houtryve
Date: 2023-10-16T08:21:48+02:00
New Revision: 544d91280c26fd5f7acd70eac4d667863562f4cc

URL: https://github.com/llvm/llvm-project/commit/544d91280c26fd5f7acd70eac4d667863562f4cc
DIFF: https://github.com/llvm/llvm-project/commit/544d91280c26fd5f7acd70eac4d667863562f4cc.diff

LOG: [AMDGPU] Remove Code Object V3 (#67118)

V3 has been deprecated for a while as well, so it can safely be removed
like V2 was removed.

- [Clang] Set minimum code object version to 4
- [lld] Fix tests using code object v3
- Remove code object V3 from the AMDGPU backend, and delete or port v3
tests to v4.
- Update docs to make it clear V3 can no longer be emitted.

Added: 
    llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
    llvm/test/MC/AMDGPU/hsa-diag-v4.s

Modified: 
    clang/include/clang/Basic/TargetOptions.h
    clang/include/clang/Driver/Options.td
    clang/lib/Driver/ToolChains/CommonArgs.cpp
    clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
    clang/test/Driver/hip-code-object-version.hip
    clang/test/Driver/hip-device-libs.hip
    lld/test/ELF/amdgpu-abi-version.s
    llvm/docs/AMDGPUUsage.rst
    llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
    llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
    llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
    llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
    llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
    llvm/lib/Target/AMDGPU/SIISelLowering.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
    llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
    llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll
    llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
    llvm/test/CodeGen/AMDGPU/kernarg-size.ll
    llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
    llvm/test/CodeGen/AMDGPU/trap-abis.ll
    llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
    llvm/test/MC/AMDGPU/user-sgpr-count.s

Removed: 
    llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
    llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
    llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
    llvm/test/MC/AMDGPU/hsa-diag-v3.s
    llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
    llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
    llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
    llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
    llvm/test/MC/AMDGPU/hsa-v3.s


################################################################################
diff  --git a/clang/include/clang/Basic/TargetOptions.h b/clang/include/clang/Basic/TargetOptions.h
index 8bb03249b7f8308..ba3acd029587160 100644
--- a/clang/include/clang/Basic/TargetOptions.h
+++ b/clang/include/clang/Basic/TargetOptions.h
@@ -83,7 +83,7 @@ class TargetOptions {
   enum CodeObjectVersionKind {
     COV_None,
     COV_2 = 200, // Unsupported.
-    COV_3 = 300,
+    COV_3 = 300, // Unsupported.
     COV_4 = 400,
     COV_5 = 500,
   };

diff  --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 640044622fc09ee..a89d6b6579f1176 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -4682,9 +4682,9 @@ defm amdgpu_ieee : BoolOption<"m", "amdgpu-ieee",
 def mcode_object_version_EQ : Joined<["-"], "mcode-object-version=">, Group<m_Group>,
   HelpText<"Specify code object ABI version. Defaults to 4. (AMDGPU only)">,
   Visibility<[ClangOption, CC1Option]>,
-  Values<"none,3,4,5">,
+  Values<"none,4,5">,
   NormalizedValuesScope<"TargetOptions">,
-  NormalizedValues<["COV_None", "COV_3", "COV_4", "COV_5"]>,
+  NormalizedValues<["COV_None", "COV_4", "COV_5"]>,
   MarshallingInfoEnum<TargetOpts<"CodeObjectVersion">, "COV_4">;
 
 defm cumode : SimpleMFlag<"cumode",

diff  --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 25fd940584624ee..f104ec5a881cb96 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -2338,7 +2338,7 @@ getAMDGPUCodeObjectArgument(const Driver &D, const llvm::opt::ArgList &Args) {
 
 void tools::checkAMDGPUCodeObjectVersion(const Driver &D,
                                          const llvm::opt::ArgList &Args) {
-  const unsigned MinCodeObjVer = 3;
+  const unsigned MinCodeObjVer = 4;
   const unsigned MaxCodeObjVer = 5;
 
   if (auto *CodeObjArg = getAMDGPUCodeObjectArgument(D, Args)) {

diff  --git a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
index 0ddd63faf46f28f..ff5deaf9ab850d2 100644
--- a/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
+++ b/clang/test/CodeGenCUDA/amdgpu-code-object-version.cu
@@ -3,9 +3,6 @@
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -o - %s | FileCheck %s -check-prefix=V4
 
-// RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
-// RUN:   -mcode-object-version=3 -o - %s | FileCheck -check-prefix=V3 %s
-
 // RUN: %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -mcode-object-version=4 -o - %s | FileCheck -check-prefix=V4 %s
 
@@ -18,7 +15,6 @@
 // RUN: not %clang_cc1 -fcuda-is-device -triple amdgcn-amd-amdhsa -emit-llvm \
 // RUN:   -mcode-object-version=4.1 -o - %s 2>&1| FileCheck %s -check-prefix=INV
 
-// V3: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 300}
 // V4: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 400}
 // V5: !{{.*}} = !{i32 1, !"amdgpu_code_object_version", i32 500}
 // NONE-NOT: !{{.*}} = !{i32 1, !"amdgpu_code_object_version",

diff  --git a/clang/test/Driver/hip-code-object-version.hip b/clang/test/Driver/hip-code-object-version.hip
index 33559b6576e7d30..af5f9a3da21dfd3 100644
--- a/clang/test/Driver/hip-code-object-version.hip
+++ b/clang/test/Driver/hip-code-object-version.hip
@@ -1,20 +1,5 @@
 // REQUIRES: amdgpu-registered-target
 
-// Check bundle ID for code object v3.
-
-// RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN:   -mcode-object-version=3 \
-// RUN:   --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=V3 %s
-
-// RUN: not %clang -### --target=x86_64-linux-gnu \
-// RUN:   -mcode-object-version=4 -mcode-object-version=3 \
-// RUN:   --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
-// RUN:   %s 2>&1 | FileCheck -check-prefix=V3 %s
-
-// V3: "-mcode-object-version=3"
-// V3: "-mllvm" "--amdhsa-code-object-version=3"
-// V3: "-targets=host-x86_64-unknown-linux,hip-amdgcn-amd-amdhsa--gfx906"
 
 // Check bundle ID for code object version 4.
 
@@ -62,6 +47,13 @@
 // INVALID_2: error: invalid integral value '2' in '-mcode-object-version=2'
 // INVALID_2-NOT: error: invalid integral value
 
+// RUN: not %clang -### --target=x86_64-linux-gnu \
+// RUN:   -mcode-object-version=3 \
+// RUN:   --offload-arch=gfx906 --rocm-path=%S/Inputs/rocm \
+// RUN:   %s 2>&1 | FileCheck -check-prefix=INVALID_3 %s
+// INVALID_3: error: invalid integral value '3' in '-mcode-object-version=3'
+// INVALID_3-NOT: error: invalid integral value
+
 // Check LLVM code object version option --amdhsa-code-object-version
 // is passed to -cc1 and -cc1as, and -mcode-object-version is passed
 // to -cc1 but not -cc1as.

diff  --git a/clang/test/Driver/hip-device-libs.hip b/clang/test/Driver/hip-device-libs.hip
index 71d9554da696b42..6ac5778721ba5b7 100644
--- a/clang/test/Driver/hip-device-libs.hip
+++ b/clang/test/Driver/hip-device-libs.hip
@@ -168,12 +168,6 @@
 // RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
 // RUN: 2>&1 | FileCheck %s --check-prefixes=NOABI4
 
-// Test -mcode-object-version=3
-// RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
-// RUN:   -mcode-object-version=3 \
-// RUN:   --rocm-path=%S/Inputs/rocm %S/Inputs/hip_multiple_inputs/b.hip \
-// RUN: 2>&1 | FileCheck %s --check-prefixes=ABI4
-
 // Test -mcode-object-version=4
 // RUN: %clang -### --target=x86_64-linux-gnu --offload-arch=gfx900 \
 // RUN:   -mcode-object-version=4 \

diff  --git a/lld/test/ELF/amdgpu-abi-version.s b/lld/test/ELF/amdgpu-abi-version.s
index 455a52aec921092..72b67fdaeb1a1b6 100644
--- a/lld/test/ELF/amdgpu-abi-version.s
+++ b/lld/test/ELF/amdgpu-abi-version.s
@@ -1,11 +1,3 @@
-# REQUIRES: amdgpu
-# RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=3 -filetype=obj %s -o %t.o
-# RUN: ld.lld -shared %t.o -o %t.so
-# RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=COV3 %s
-
-# COV3: OS/ABI: AMDGPU_HSA (0x40)
-# COV3: ABIVersion: 1
-
 # RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx900 --amdhsa-code-object-version=4 -filetype=obj %s -o %t.o
 # RUN: ld.lld -shared %t.o -o %t.so
 # RUN: llvm-readobj --file-headers %t.so | FileCheck --check-prefix=COV4 %s

diff  --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 8022816d7e616d3..ed9581ccc93dfac 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -1409,12 +1409,10 @@ The AMDGPU backend uses the following ELF header:
   object conforms:
 
   * ``ELFABIVERSION_AMDGPU_HSA_V2`` is used to specify the version of AMD HSA
-    runtime ABI for code object V2. Specify using the Clang option
-    ``-mcode-object-version=2``.
+    runtime ABI for code object V2. Can no longer be emitted by this version of LLVM.
 
   * ``ELFABIVERSION_AMDGPU_HSA_V3`` is used to specify the version of AMD HSA
-    runtime ABI for code object V3. Specify using the Clang option
-    ``-mcode-object-version=3``.
+    runtime ABI for code object V3. Can no longer be emitted by this version of LLVM.
 
   * ``ELFABIVERSION_AMDGPU_HSA_V4`` is used to specify the version of AMD HSA
     runtime ABI for code object V4. Specify using the Clang option
@@ -3402,8 +3400,7 @@ Code Object V3 Metadata
 +++++++++++++++++++++++
 
 .. warning::
-  Code object V3 is not the default code object version emitted by this version
-  of LLVM.
+  Code object V3 generation is no longer supported by this version of LLVM.
 
 Code object V3 and above metadata is specified by the ``NT_AMDGPU_METADATA`` note
 record (see :ref:`amdgpu-note-records-v3-onwards`).

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index aadc4a68ea13278..8d0ef67a615dfc6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -341,9 +341,6 @@ bool AMDGPUAsmPrinter::doInitialization(Module &M) {
 
   if (TM.getTargetTriple().getOS() == Triple::AMDHSA) {
     switch (CodeObjectVersion) {
-    case AMDGPU::AMDHSA_COV3:
-      HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV3());
-      break;
     case AMDGPU::AMDHSA_COV4:
       HSAMetadataStream.reset(new HSAMD::MetadataStreamerMsgPackV4());
       break;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
index 5060cd3aec581ce..b51a876750b58b0 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.cpp
@@ -49,14 +49,14 @@ namespace AMDGPU {
 namespace HSAMD {
 
 //===----------------------------------------------------------------------===//
-// HSAMetadataStreamerV3
+// HSAMetadataStreamerV4
 //===----------------------------------------------------------------------===//
 
-void MetadataStreamerMsgPackV3::dump(StringRef HSAMetadataString) const {
+void MetadataStreamerMsgPackV4::dump(StringRef HSAMetadataString) const {
   errs() << "AMDGPU HSA Metadata:\n" << HSAMetadataString << '\n';
 }
 
-void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
+void MetadataStreamerMsgPackV4::verify(StringRef HSAMetadataString) const {
   errs() << "AMDGPU HSA Metadata Parser Test: ";
 
   msgpack::Document FromHSAMetadataString;
@@ -78,7 +78,7 @@ void MetadataStreamerMsgPackV3::verify(StringRef HSAMetadataString) const {
 }
 
 std::optional<StringRef>
-MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const {
+MetadataStreamerMsgPackV4::getAccessQualifier(StringRef AccQual) const {
   return StringSwitch<std::optional<StringRef>>(AccQual)
       .Case("read_only", StringRef("read_only"))
       .Case("write_only", StringRef("write_only"))
@@ -86,7 +86,7 @@ MetadataStreamerMsgPackV3::getAccessQualifier(StringRef AccQual) const {
       .Default(std::nullopt);
 }
 
-std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier(
+std::optional<StringRef> MetadataStreamerMsgPackV4::getAddressSpaceQualifier(
     unsigned AddressSpace) const {
   switch (AddressSpace) {
   case AMDGPUAS::PRIVATE_ADDRESS:
@@ -107,7 +107,7 @@ std::optional<StringRef> MetadataStreamerMsgPackV3::getAddressSpaceQualifier(
 }
 
 StringRef
-MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual,
+MetadataStreamerMsgPackV4::getValueKind(Type *Ty, StringRef TypeQual,
                                         StringRef BaseTypeName) const {
   if (TypeQual.contains("pipe"))
     return "pipe";
@@ -134,7 +134,7 @@ MetadataStreamerMsgPackV3::getValueKind(Type *Ty, StringRef TypeQual,
                    : "by_value");
 }
 
-std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty,
+std::string MetadataStreamerMsgPackV4::getTypeName(Type *Ty,
                                                    bool Signed) const {
   switch (Ty->getTypeID()) {
   case Type::IntegerTyID: {
@@ -173,7 +173,7 @@ std::string MetadataStreamerMsgPackV3::getTypeName(Type *Ty,
 }
 
 msgpack::ArrayDocNode
-MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const {
+MetadataStreamerMsgPackV4::getWorkGroupDimensions(MDNode *Node) const {
   auto Dims = HSAMetadataDoc->getArrayNode();
   if (Node->getNumOperands() != 3)
     return Dims;
@@ -184,14 +184,20 @@ MetadataStreamerMsgPackV3::getWorkGroupDimensions(MDNode *Node) const {
   return Dims;
 }
 
-void MetadataStreamerMsgPackV3::emitVersion() {
+void MetadataStreamerMsgPackV4::emitVersion() {
   auto Version = HSAMetadataDoc->getArrayNode();
-  Version.push_back(Version.getDocument()->getNode(VersionMajorV3));
-  Version.push_back(Version.getDocument()->getNode(VersionMinorV3));
+  Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
+  Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
   getRootMetadata("amdhsa.version") = Version;
 }
 
-void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) {
+void MetadataStreamerMsgPackV4::emitTargetID(
+    const IsaInfo::AMDGPUTargetID &TargetID) {
+  getRootMetadata("amdhsa.target") =
+      HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
+}
+
+void MetadataStreamerMsgPackV4::emitPrintf(const Module &Mod) {
   auto Node = Mod.getNamedMetadata("llvm.printf.fmts");
   if (!Node)
     return;
@@ -204,7 +210,7 @@ void MetadataStreamerMsgPackV3::emitPrintf(const Module &Mod) {
   getRootMetadata("amdhsa.printf") = Printf;
 }
 
-void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func,
+void MetadataStreamerMsgPackV4::emitKernelLanguage(const Function &Func,
                                                    msgpack::MapDocNode Kern) {
   // TODO: What about other languages?
   auto Node = Func.getParent()->getNamedMetadata("opencl.ocl.version");
@@ -223,7 +229,7 @@ void MetadataStreamerMsgPackV3::emitKernelLanguage(const Function &Func,
   Kern[".language_version"] = LanguageVersion;
 }
 
-void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func,
+void MetadataStreamerMsgPackV4::emitKernelAttrs(const Function &Func,
                                                 msgpack::MapDocNode Kern) {
 
   if (auto Node = Func.getMetadata("reqd_work_group_size"))
@@ -248,7 +254,7 @@ void MetadataStreamerMsgPackV3::emitKernelAttrs(const Function &Func,
     Kern[".kind"] = Kern.getDocument()->getNode("fini");
 }
 
-void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF,
+void MetadataStreamerMsgPackV4::emitKernelArgs(const MachineFunction &MF,
                                                msgpack::MapDocNode Kern) {
   auto &Func = MF.getFunction();
   unsigned Offset = 0;
@@ -261,7 +267,7 @@ void MetadataStreamerMsgPackV3::emitKernelArgs(const MachineFunction &MF,
   Kern[".args"] = Args;
 }
 
-void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg,
+void MetadataStreamerMsgPackV4::emitKernelArg(const Argument &Arg,
                                               unsigned &Offset,
                                               msgpack::ArrayDocNode Args) {
   auto Func = Arg.getParent();
@@ -326,7 +332,7 @@ void MetadataStreamerMsgPackV3::emitKernelArg(const Argument &Arg,
                 AccQual, TypeQual);
 }
 
-void MetadataStreamerMsgPackV3::emitKernelArg(
+void MetadataStreamerMsgPackV4::emitKernelArg(
     const DataLayout &DL, Type *Ty, Align Alignment, StringRef ValueKind,
     unsigned &Offset, msgpack::ArrayDocNode Args, MaybeAlign PointeeAlign,
     StringRef Name, StringRef TypeName, StringRef BaseTypeName,
@@ -375,7 +381,7 @@ void MetadataStreamerMsgPackV3::emitKernelArg(
   Args.push_back(Arg);
 }
 
-void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
+void MetadataStreamerMsgPackV4::emitHiddenKernelArgs(
     const MachineFunction &MF, unsigned &Offset, msgpack::ArrayDocNode Args) {
   auto &Func = MF.getFunction();
   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
@@ -448,9 +454,10 @@ void MetadataStreamerMsgPackV3::emitHiddenKernelArgs(
   }
 }
 
-msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
-    const MachineFunction &MF, const SIProgramInfo &ProgramInfo,
-    unsigned CodeObjectVersion) const {
+msgpack::MapDocNode
+MetadataStreamerMsgPackV4::getHSAKernelProps(const MachineFunction &MF,
+                                             const SIProgramInfo &ProgramInfo,
+                                             unsigned CodeObjectVersion) const {
   const GCNSubtarget &STM = MF.getSubtarget<GCNSubtarget>();
   const SIMachineFunctionInfo &MFI = *MF.getInfo<SIMachineFunctionInfo>();
   const Function &F = MF.getFunction();
@@ -495,18 +502,19 @@ msgpack::MapDocNode MetadataStreamerMsgPackV3::getHSAKernelProps(
   return Kern;
 }
 
-bool MetadataStreamerMsgPackV3::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
+bool MetadataStreamerMsgPackV4::emitTo(AMDGPUTargetStreamer &TargetStreamer) {
   return TargetStreamer.EmitHSAMetadata(*HSAMetadataDoc, true);
 }
 
-void MetadataStreamerMsgPackV3::begin(const Module &Mod,
+void MetadataStreamerMsgPackV4::begin(const Module &Mod,
                                       const IsaInfo::AMDGPUTargetID &TargetID) {
   emitVersion();
+  emitTargetID(TargetID);
   emitPrintf(Mod);
   getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
 }
 
-void MetadataStreamerMsgPackV3::end() {
+void MetadataStreamerMsgPackV4::end() {
   std::string HSAMetadataString;
   raw_string_ostream StrOS(HSAMetadataString);
   HSAMetadataDoc->toYAML(StrOS);
@@ -517,7 +525,7 @@ void MetadataStreamerMsgPackV3::end() {
     verify(StrOS.str());
 }
 
-void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF,
+void MetadataStreamerMsgPackV4::emitKernel(const MachineFunction &MF,
                                            const SIProgramInfo &ProgramInfo) {
   auto &Func = MF.getFunction();
   if (Func.getCallingConv() != CallingConv::AMDGPU_KERNEL &&
@@ -542,31 +550,6 @@ void MetadataStreamerMsgPackV3::emitKernel(const MachineFunction &MF,
   Kernels.push_back(Kern);
 }
 
-//===----------------------------------------------------------------------===//
-// HSAMetadataStreamerV4
-//===----------------------------------------------------------------------===//
-
-void MetadataStreamerMsgPackV4::emitVersion() {
-  auto Version = HSAMetadataDoc->getArrayNode();
-  Version.push_back(Version.getDocument()->getNode(VersionMajorV4));
-  Version.push_back(Version.getDocument()->getNode(VersionMinorV4));
-  getRootMetadata("amdhsa.version") = Version;
-}
-
-void MetadataStreamerMsgPackV4::emitTargetID(
-    const IsaInfo::AMDGPUTargetID &TargetID) {
-  getRootMetadata("amdhsa.target") =
-      HSAMetadataDoc->getNode(TargetID.toString(), /*Copy=*/true);
-}
-
-void MetadataStreamerMsgPackV4::begin(const Module &Mod,
-                                      const IsaInfo::AMDGPUTargetID &TargetID) {
-  emitVersion();
-  emitTargetID(TargetID);
-  emitPrintf(Mod);
-  getRootMetadata("amdhsa.kernels") = HSAMetadataDoc->getArrayNode();
-}
-
 //===----------------------------------------------------------------------===//
 // HSAMetadataStreamerV5
 //===----------------------------------------------------------------------===//
@@ -680,7 +663,7 @@ void MetadataStreamerMsgPackV5::emitHiddenKernelArgs(
 
 void MetadataStreamerMsgPackV5::emitKernelAttrs(const Function &Func,
                                                 msgpack::MapDocNode Kern) {
-  MetadataStreamerMsgPackV3::emitKernelAttrs(Func, Kern);
+  MetadataStreamerMsgPackV4::emitKernelAttrs(Func, Kern);
 
   if (Func.getFnAttribute("uniform-work-group-size").getValueAsBool())
     Kern[".uniform_work_group_size"] = Kern.getDocument()->getNode(1);

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
index d2b3b8917ce0f70..18a7b5d7a9633e8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUHSAMetadataStreamer.h
@@ -62,7 +62,7 @@ class MetadataStreamer {
                                msgpack::MapDocNode Kern) = 0;
 };
 
-class MetadataStreamerMsgPackV3 : public MetadataStreamer {
+class MetadataStreamerMsgPackV4 : public MetadataStreamer {
 protected:
   std::unique_ptr<msgpack::Document> HSAMetadataDoc =
       std::make_unique<msgpack::Document>();
@@ -89,6 +89,8 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer {
 
   void emitVersion() override;
 
+  void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
+
   void emitPrintf(const Module &Mod);
 
   void emitKernelLanguage(const Function &Func, msgpack::MapDocNode Kern);
@@ -120,8 +122,8 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer {
   }
 
 public:
-  MetadataStreamerMsgPackV3() = default;
-  ~MetadataStreamerMsgPackV3() = default;
+  MetadataStreamerMsgPackV4() = default;
+  ~MetadataStreamerMsgPackV4() = default;
 
   bool emitTo(AMDGPUTargetStreamer &TargetStreamer) override;
 
@@ -134,19 +136,6 @@ class MetadataStreamerMsgPackV3 : public MetadataStreamer {
                   const SIProgramInfo &ProgramInfo) override;
 };
 
-class MetadataStreamerMsgPackV4 : public MetadataStreamerMsgPackV3 {
-protected:
-  void emitVersion() override;
-  void emitTargetID(const IsaInfo::AMDGPUTargetID &TargetID);
-
-public:
-  MetadataStreamerMsgPackV4() = default;
-  ~MetadataStreamerMsgPackV4() = default;
-
-  void begin(const Module &Mod,
-             const IsaInfo::AMDGPUTargetID &TargetID) override;
-};
-
 class MetadataStreamerMsgPackV5 final : public MetadataStreamerMsgPackV4 {
 protected:
   void emitVersion() override;

diff  --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 02cb77f6ecaca4e..d6717c998bec8be 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -6489,11 +6489,6 @@ bool AMDGPULegalizerInfo::legalizeTrapIntrinsic(MachineInstr &MI,
       ST.getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
     return legalizeTrapEndpgm(MI, MRI, B);
 
-  const Module *M = B.getMF().getFunction().getParent();
-  unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M);
-  if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3)
-    return legalizeTrapHsaQueuePtr(MI, MRI, B);
-
   return ST.supportsGetDoorbellID() ?
          legalizeTrapHsa(MI, MRI, B) : legalizeTrapHsaQueuePtr(MI, MRI, B);
 }

diff  --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 6b8c03c1620d26b..42af09e27e471e8 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -424,7 +424,6 @@ void AMDGPUTargetAsmStreamer::EmitAmdhsaKernelDescriptor(
   switch (CodeObjectVersion) {
   default:
     break;
-  case AMDGPU::AMDHSA_COV3:
   case AMDGPU::AMDHSA_COV4:
   case AMDGPU::AMDHSA_COV5:
     if (getTargetID()->isXnackSupported())

diff  --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 33f65ab786584fd..cd849560feac22b 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -5990,11 +5990,6 @@ SDValue SITargetLowering::lowerTRAP(SDValue Op, SelectionDAG &DAG) const {
       Subtarget->getTrapHandlerAbi() != GCNSubtarget::TrapHandlerAbi::AMDHSA)
     return lowerTrapEndpgm(Op, DAG);
 
-  const Module *M = DAG.getMachineFunction().getFunction().getParent();
-  unsigned CodeObjectVersion = AMDGPU::getCodeObjectVersion(*M);
-  if (CodeObjectVersion <= AMDGPU::AMDHSA_COV3)
-    return lowerTrapHsaQueuePtr(Op, DAG);
-
   return Subtarget->supportsGetDoorbellID() ? lowerTrapHsa(Op, DAG) :
          lowerTrapHsaQueuePtr(Op, DAG);
 }

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index d123b384a27d4cc..5fff19eada75dd8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -128,8 +128,6 @@ std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
     return std::nullopt;
 
   switch (AmdhsaCodeObjectVersion) {
-  case 3:
-    return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
   case 4:
     return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
   case 5:
@@ -140,12 +138,6 @@ std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
   }
 }
 
-bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
-  if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
-    return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
-  return false;
-}
-
 bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
   if (std::optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
     return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
@@ -174,7 +166,6 @@ unsigned getCodeObjectVersion(const Module &M) {
 
 unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
   switch (CodeObjectVersion) {
-  case AMDHSA_COV3:
   case AMDHSA_COV4:
     return 48;
   case AMDHSA_COV5:
@@ -188,7 +179,6 @@ unsigned getMultigridSyncArgImplicitArgPosition(unsigned CodeObjectVersion) {
 // central TD file.
 unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
   switch (CodeObjectVersion) {
-  case AMDHSA_COV3:
   case AMDHSA_COV4:
     return 24;
   case AMDHSA_COV5:
@@ -199,7 +189,6 @@ unsigned getHostcallImplicitArgPosition(unsigned CodeObjectVersion) {
 
 unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
   switch (CodeObjectVersion) {
-  case AMDHSA_COV3:
   case AMDHSA_COV4:
     return 32;
   case AMDHSA_COV5:
@@ -210,7 +199,6 @@ unsigned getDefaultQueueImplicitArgPosition(unsigned CodeObjectVersion) {
 
 unsigned getCompletionActionImplicitArgPosition(unsigned CodeObjectVersion) {
   switch (CodeObjectVersion) {
-  case AMDHSA_COV3:
   case AMDHSA_COV4:
     return 40;
   case AMDHSA_COV5:
@@ -774,15 +762,6 @@ std::string AMDGPUTargetID::toString() const {
   std::string Features;
   if (STI.getTargetTriple().getOS() == Triple::AMDHSA) {
     switch (CodeObjectVersion) {
-    case AMDGPU::AMDHSA_COV3:
-      // xnack.
-      if (isXnackOnOrAny())
-        Features += "+xnack";
-      // In code object v2 and v3, "sramecc" feature was spelled with a
-      // hyphen ("sram-ecc").
-      if (isSramEccOnOrAny())
-        Features += "+sram-ecc";
-      break;
     case AMDGPU::AMDHSA_COV4:
     case AMDGPU::AMDHSA_COV5:
       // sramecc.

diff  --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index bb2964f592f66bf..1e0994d0862cf5d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,19 +42,12 @@ namespace AMDGPU {
 
 struct IsaVersion;
 
-enum {
-  AMDHSA_COV3 = 3,
-  AMDHSA_COV4 = 4,
-  AMDHSA_COV5 = 5
-};
+enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5 };
 
 /// \returns True if \p STI is AMDHSA.
 bool isHsaAbi(const MCSubtargetInfo &STI);
 /// \returns HSA OS ABI Version identification.
 std::optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI);
-/// \returns True if HSA OS ABI Version identification is 3,
-/// false otherwise.
-bool isHsaAbiVersion3(const MCSubtargetInfo *STI);
 /// \returns True if HSA OS ABI Version identification is 4,
 /// false otherwise.
 bool isHsaAbiVersion4(const MCSubtargetInfo *STI);

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
index c25ecafa1f7c074..4bdbe6604782a8b 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/implicit-kernarg-backend-usage-global-isel.ll
@@ -1,38 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
 
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
 
 define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
-; GFX8V3-LABEL: addrspacecast:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX8V3-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x40
-; GFX8V3-NEXT:    v_mov_b32_e32 v2, 1
-; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT:    s_mov_b32 s4, s0
-; GFX8V3-NEXT:    s_mov_b32 s5, s3
-; GFX8V3-NEXT:    s_cmp_lg_u32 s0, -1
-; GFX8V3-NEXT:    s_cselect_b64 s[4:5], s[4:5], 0
-; GFX8V3-NEXT:    s_mov_b32 s6, s1
-; GFX8V3-NEXT:    s_mov_b32 s7, s2
-; GFX8V3-NEXT:    s_cmp_lg_u32 s1, -1
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8V3-NEXT:    s_cselect_b64 s[0:1], s[6:7], 0
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s5
-; GFX8V3-NEXT:    flat_store_dword v[0:1], v2
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT:    v_mov_b32_e32 v2, 2
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8V3-NEXT:    flat_store_dword v[0:1], v2
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    s_endpgm
-;
 ; GFX8V4-LABEL: addrspacecast:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -82,30 +55,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V5-NEXT:    s_endpgm
 ;
-; GFX9V3-LABEL: addrspacecast:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT:    s_mov_b64 s[2:3], src_private_base
-; GFX9V3-NEXT:    s_mov_b64 s[4:5], src_shared_base
-; GFX9V3-NEXT:    v_mov_b32_e32 v2, 1
-; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT:    s_mov_b32 s2, s0
-; GFX9V3-NEXT:    s_cmp_lg_u32 s0, -1
-; GFX9V3-NEXT:    s_cselect_b64 s[2:3], s[2:3], 0
-; GFX9V3-NEXT:    s_mov_b32 s4, s1
-; GFX9V3-NEXT:    s_cmp_lg_u32 s1, -1
-; GFX9V3-NEXT:    v_mov_b32_e32 v0, s2
-; GFX9V3-NEXT:    s_cselect_b64 s[0:1], s[4:5], 0
-; GFX9V3-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9V3-NEXT:    flat_store_dword v[0:1], v2
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9V3-NEXT:    v_mov_b32_e32 v2, 2
-; GFX9V3-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9V3-NEXT:    flat_store_dword v[0:1], v2
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    s_endpgm
-;
 ; GFX9V4-LABEL: addrspacecast:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -161,19 +110,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 }
 
 define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_is_shared:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT:    s_load_dword s0, s[4:5], 0x40
-; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT:    s_cmp_eq_u32 s1, s0
-; GFX8V3-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT:    flat_store_dword v[0:1], v0
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    s_endpgm
-;
 ; GFX8V4-LABEL: llvm_amdgcn_is_shared:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -200,18 +136,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
 ; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V5-NEXT:    s_endpgm
 ;
-; GFX9V3-LABEL: llvm_amdgcn_is_shared:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT:    s_mov_b64 s[2:3], src_shared_base
-; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT:    s_cmp_eq_u32 s1, s3
-; GFX9V3-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX9V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9V3-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    s_endpgm
-;
 ; GFX9V4-LABEL: llvm_amdgcn_is_shared:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -242,19 +166,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
 }
 
 define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_is_private:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT:    s_load_dword s0, s[4:5], 0x44
-; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT:    s_cmp_eq_u32 s1, s0
-; GFX8V3-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT:    flat_store_dword v[0:1], v0
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    s_endpgm
-;
 ; GFX8V4-LABEL: llvm_amdgcn_is_private:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -281,18 +192,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
 ; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V5-NEXT:    s_endpgm
 ;
-; GFX9V3-LABEL: llvm_amdgcn_is_private:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT:    s_mov_b64 s[2:3], src_private_base
-; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT:    s_cmp_eq_u32 s1, s3
-; GFX9V3-NEXT:    s_cselect_b32 s0, 1, 0
-; GFX9V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9V3-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    s_endpgm
-;
 ; GFX9V4-LABEL: llvm_amdgcn_is_private:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -323,11 +222,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
 }
 
 define amdgpu_kernel void @llvm_trap() {
-; GFX8V3-LABEL: llvm_trap:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; GFX8V3-NEXT:    s_trap 2
-;
 ; GFX8V4-LABEL: llvm_trap:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_mov_b64 s[0:1], s[4:5]
@@ -339,11 +233,6 @@ define amdgpu_kernel void @llvm_trap() {
 ; GFX8V5-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8V5-NEXT:    s_trap 2
 ;
-; GFX9V3-LABEL: llvm_trap:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; GFX9V3-NEXT:    s_trap 2
-;
 ; GFX9V4-LABEL: llvm_trap:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_trap 2
@@ -356,10 +245,6 @@ define amdgpu_kernel void @llvm_trap() {
 }
 
 define amdgpu_kernel void @llvm_debugtrap() {
-; GFX8V3-LABEL: llvm_debugtrap:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_trap 3
-;
 ; GFX8V4-LABEL: llvm_debugtrap:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_trap 3
@@ -368,10 +253,6 @@ define amdgpu_kernel void @llvm_debugtrap() {
 ; GFX8V5:       ; %bb.0:
 ; GFX8V5-NEXT:    s_trap 3
 ;
-; GFX9V3-LABEL: llvm_debugtrap:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_trap 3
-;
 ; GFX9V4-LABEL: llvm_debugtrap:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_trap 3
@@ -384,32 +265,6 @@ define amdgpu_kernel void @llvm_debugtrap() {
 }
 
 define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr)  {
-; GFX8V3-LABEL: llvm_amdgcn_queue_ptr:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s6
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s7
-; GFX8V3-NEXT:    s_add_u32 s0, s8, 8
-; GFX8V3-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT:    s_addc_u32 s1, s9, 0
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8V3-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s5
-; GFX8V3-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s10
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s11
-; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8V3-NEXT:    v_mov_b32_e32 v2, s0
-; GFX8V3-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    s_endpgm
-;
 ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    v_mov_b32_e32 v0, s6
@@ -460,23 +315,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr)  {
 ; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V5-NEXT:    s_endpgm
 ;
-; GFX9V3-LABEL: llvm_amdgcn_queue_ptr:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9V3-NEXT:    global_load_ubyte v0, v2, s[6:7] glc
-; GFX9V3-NEXT:    global_load_ubyte v0, v2, s[8:9] offset:8 glc
-; GFX9V3-NEXT:    global_load_ubyte v0, v2, s[4:5] glc
-; GFX9V3-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    v_mov_b32_e32 v0, s10
-; GFX9V3-NEXT:    v_mov_b32_e32 v1, s11
-; GFX9V3-NEXT:    ; kill: killed $sgpr6_sgpr7
-; GFX9V3-NEXT:    ; kill: killed $sgpr4_sgpr5
-; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    s_endpgm
-;
 ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    v_mov_b32_e32 v2, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll b/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
deleted file mode 100644
index 20d0aea61f27688..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/attr-amdgpu-flat-work-group-size-v3.ll
+++ /dev/null
@@ -1,148 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs -amdgpu-verify-hsa-metadata -filetype=obj -o /dev/null < %s 2>&1 | FileCheck --check-prefix=PARSER %s
-
-; CHECK-LABEL: {{^}}min_64_max_64:
-; CHECK: SGPRBlocks: 0
-; CHECK: VGPRBlocks: 0
-; CHECK: NumSGPRsForWavesPerEU: 1
-; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @min_64_max_64() #0 {
-entry:
-  ret void
-}
-attributes #0 = {"amdgpu-flat-work-group-size"="64,64"}
-
-; CHECK-LABEL: {{^}}min_64_max_128:
-; CHECK: SGPRBlocks: 0
-; CHECK: VGPRBlocks: 0
-; CHECK: NumSGPRsForWavesPerEU: 1
-; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @min_64_max_128() #1 {
-entry:
-  ret void
-}
-attributes #1 = {"amdgpu-flat-work-group-size"="64,128"}
-
-; CHECK-LABEL: {{^}}min_128_max_128:
-; CHECK: SGPRBlocks: 0
-; CHECK: VGPRBlocks: 0
-; CHECK: NumSGPRsForWavesPerEU: 1
-; CHECK: NumVGPRsForWavesPerEU: 1
-define amdgpu_kernel void @min_128_max_128() #2 {
-entry:
-  ret void
-}
-attributes #2 = {"amdgpu-flat-work-group-size"="128,128"}
-
-; CHECK-LABEL: {{^}}min_1024_max_1024
-; CHECK: SGPRBlocks: 0
-; CHECK: VGPRBlocks: 10
-; CHECK: NumSGPRsForWavesPerEU: 2{{$}}
-; CHECK: NumVGPRsForWavesPerEU: 43
- at var = addrspace(1) global float 0.0
-define amdgpu_kernel void @min_1024_max_1024() #3 {
-  %val0 = load volatile float, ptr addrspace(1) @var
-  %val1 = load volatile float, ptr addrspace(1) @var
-  %val2 = load volatile float, ptr addrspace(1) @var
-  %val3 = load volatile float, ptr addrspace(1) @var
-  %val4 = load volatile float, ptr addrspace(1) @var
-  %val5 = load volatile float, ptr addrspace(1) @var
-  %val6 = load volatile float, ptr addrspace(1) @var
-  %val7 = load volatile float, ptr addrspace(1) @var
-  %val8 = load volatile float, ptr addrspace(1) @var
-  %val9 = load volatile float, ptr addrspace(1) @var
-  %val10 = load volatile float, ptr addrspace(1) @var
-  %val11 = load volatile float, ptr addrspace(1) @var
-  %val12 = load volatile float, ptr addrspace(1) @var
-  %val13 = load volatile float, ptr addrspace(1) @var
-  %val14 = load volatile float, ptr addrspace(1) @var
-  %val15 = load volatile float, ptr addrspace(1) @var
-  %val16 = load volatile float, ptr addrspace(1) @var
-  %val17 = load volatile float, ptr addrspace(1) @var
-  %val18 = load volatile float, ptr addrspace(1) @var
-  %val19 = load volatile float, ptr addrspace(1) @var
-  %val20 = load volatile float, ptr addrspace(1) @var
-  %val21 = load volatile float, ptr addrspace(1) @var
-  %val22 = load volatile float, ptr addrspace(1) @var
-  %val23 = load volatile float, ptr addrspace(1) @var
-  %val24 = load volatile float, ptr addrspace(1) @var
-  %val25 = load volatile float, ptr addrspace(1) @var
-  %val26 = load volatile float, ptr addrspace(1) @var
-  %val27 = load volatile float, ptr addrspace(1) @var
-  %val28 = load volatile float, ptr addrspace(1) @var
-  %val29 = load volatile float, ptr addrspace(1) @var
-  %val30 = load volatile float, ptr addrspace(1) @var
-  %val31 = load volatile float, ptr addrspace(1) @var
-  %val32 = load volatile float, ptr addrspace(1) @var
-  %val33 = load volatile float, ptr addrspace(1) @var
-  %val34 = load volatile float, ptr addrspace(1) @var
-  %val35 = load volatile float, ptr addrspace(1) @var
-  %val36 = load volatile float, ptr addrspace(1) @var
-  %val37 = load volatile float, ptr addrspace(1) @var
-  %val38 = load volatile float, ptr addrspace(1) @var
-  %val39 = load volatile float, ptr addrspace(1) @var
-  %val40 = load volatile float, ptr addrspace(1) @var
-
-  store volatile float %val0, ptr addrspace(1) @var
-  store volatile float %val1, ptr addrspace(1) @var
-  store volatile float %val2, ptr addrspace(1) @var
-  store volatile float %val3, ptr addrspace(1) @var
-  store volatile float %val4, ptr addrspace(1) @var
-  store volatile float %val5, ptr addrspace(1) @var
-  store volatile float %val6, ptr addrspace(1) @var
-  store volatile float %val7, ptr addrspace(1) @var
-  store volatile float %val8, ptr addrspace(1) @var
-  store volatile float %val9, ptr addrspace(1) @var
-  store volatile float %val10, ptr addrspace(1) @var
-  store volatile float %val11, ptr addrspace(1) @var
-  store volatile float %val12, ptr addrspace(1) @var
-  store volatile float %val13, ptr addrspace(1) @var
-  store volatile float %val14, ptr addrspace(1) @var
-  store volatile float %val15, ptr addrspace(1) @var
-  store volatile float %val16, ptr addrspace(1) @var
-  store volatile float %val17, ptr addrspace(1) @var
-  store volatile float %val18, ptr addrspace(1) @var
-  store volatile float %val19, ptr addrspace(1) @var
-  store volatile float %val20, ptr addrspace(1) @var
-  store volatile float %val21, ptr addrspace(1) @var
-  store volatile float %val22, ptr addrspace(1) @var
-  store volatile float %val23, ptr addrspace(1) @var
-  store volatile float %val24, ptr addrspace(1) @var
-  store volatile float %val25, ptr addrspace(1) @var
-  store volatile float %val26, ptr addrspace(1) @var
-  store volatile float %val27, ptr addrspace(1) @var
-  store volatile float %val28, ptr addrspace(1) @var
-  store volatile float %val29, ptr addrspace(1) @var
-  store volatile float %val30, ptr addrspace(1) @var
-  store volatile float %val31, ptr addrspace(1) @var
-  store volatile float %val32, ptr addrspace(1) @var
-  store volatile float %val33, ptr addrspace(1) @var
-  store volatile float %val34, ptr addrspace(1) @var
-  store volatile float %val35, ptr addrspace(1) @var
-  store volatile float %val36, ptr addrspace(1) @var
-  store volatile float %val37, ptr addrspace(1) @var
-  store volatile float %val38, ptr addrspace(1) @var
-  store volatile float %val39, ptr addrspace(1) @var
-  store volatile float %val40, ptr addrspace(1) @var
-
-  ret void
-}
-attributes #3 = {"amdgpu-flat-work-group-size"="1024,1024"}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
-
-; CHECK: amdhsa.kernels:
-; CHECK:   .max_flat_workgroup_size: 64
-; CHECK:   .name:                 min_64_max_64
-; CHECK:   .max_flat_workgroup_size: 128
-; CHECK:   .name:                 min_64_max_128
-; CHECK:   .max_flat_workgroup_size: 128
-; CHECK:   .name:                 min_128_max_128
-; CHECK:   .max_flat_workgroup_size: 1024
-; CHECK:   .name:                 min_1024_max_1024
-; CHECK: amdhsa.version:
-; CHECK:   - 1
-; CHECK:   - 0
-
-; PARSER: AMDGPU HSA Metadata Parser Test: PASS

diff  --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll
deleted file mode 100644
index 6c553e3726abf90..000000000000000
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target-v3.ll
+++ /dev/null
@@ -1,168 +0,0 @@
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx600 < %s | FileCheck --check-prefixes=V3-GFX600 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tahiti < %s | FileCheck --check-prefixes=V3-GFX600 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx601 < %s | FileCheck --check-prefixes=V3-GFX601 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=pitcairn < %s | FileCheck --check-prefixes=V3-GFX601 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=verde < %s | FileCheck --check-prefixes=V3-GFX601 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx602 < %s | FileCheck --check-prefixes=V3-GFX602 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hainan < %s | FileCheck --check-prefixes=V3-GFX602 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=oland < %s | FileCheck --check-prefixes=V3-GFX602 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck --check-prefixes=V3-GFX700 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri < %s | FileCheck --check-prefixes=V3-GFX700 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx701 < %s | FileCheck --check-prefixes=V3-GFX701 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii < %s | FileCheck --check-prefixes=V3-GFX701 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx702 < %s | FileCheck --check-prefixes=V3-GFX702 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx703 < %s | FileCheck --check-prefixes=V3-GFX703 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kabini < %s | FileCheck --check-prefixes=V3-GFX703 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=mullins < %s | FileCheck --check-prefixes=V3-GFX703 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx704 < %s | FileCheck --check-prefixes=V3-GFX704 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=bonaire < %s | FileCheck --check-prefixes=V3-GFX704 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx705 < %s | FileCheck --check-prefixes=V3-GFX705 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx801 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX801-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=carrizo -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX801-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx802 < %s | FileCheck --check-prefixes=V3-GFX802 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=iceland < %s | FileCheck --check-prefixes=V3-GFX802 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga < %s | FileCheck --check-prefixes=V3-GFX802 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 < %s | FileCheck --check-prefixes=V3-GFX803 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji < %s | FileCheck --check-prefixes=V3-GFX803 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris10 < %s | FileCheck --check-prefixes=V3-GFX803 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=polaris11 < %s | FileCheck --check-prefixes=V3-GFX803 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx805 < %s | FileCheck --check-prefixes=V3-GFX805 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tongapro < %s | FileCheck --check-prefixes=V3-GFX805 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX810-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=stoney -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX810-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX900-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX900-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX902-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx902 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX902-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX904-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx904 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX904-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-NOSRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX906-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,-xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=-sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-NOSRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -mattr=+sramecc,+xnack < %s | FileCheck --check-prefixes=V3-GFX908-SRAMECC-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX909-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx909 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX909-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX940-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1011-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1011 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1011-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1012-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1012 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1012-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1013-NOXNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1013-XNACK %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1030 < %s | FileCheck --check-prefixes=V3-GFX1030 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1031 < %s | FileCheck --check-prefixes=V3-GFX1031 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1032 < %s | FileCheck --check-prefixes=V3-GFX1032 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1033 < %s | FileCheck --check-prefixes=V3-GFX1033 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1034 < %s | FileCheck --check-prefixes=V3-GFX1034 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1035 < %s | FileCheck --check-prefixes=V3-GFX1035 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 < %s | FileCheck --check-prefixes=V3-GFX1036 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck --check-prefixes=V3-GFX1100 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1101 < %s | FileCheck --check-prefixes=V3-GFX1101 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1102 < %s | FileCheck --check-prefixes=V3-GFX1102 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1103 < %s | FileCheck --check-prefixes=V3-GFX1103 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1150 < %s | FileCheck --check-prefixes=V3-GFX1150 %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 < %s | FileCheck --check-prefixes=V3-GFX1151 %s
-
-; V3-GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
-; V3-GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
-; V3-GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602"
-; V3-GFX700: .amdgcn_target "amdgcn-amd-amdhsa--gfx700"
-; V3-GFX701: .amdgcn_target "amdgcn-amd-amdhsa--gfx701"
-; V3-GFX702: .amdgcn_target "amdgcn-amd-amdhsa--gfx702"
-; V3-GFX703: .amdgcn_target "amdgcn-amd-amdhsa--gfx703"
-; V3-GFX704: .amdgcn_target "amdgcn-amd-amdhsa--gfx704"
-; V3-GFX705: .amdgcn_target "amdgcn-amd-amdhsa--gfx705"
-; V3-GFX801-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801"
-; V3-GFX801-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx801+xnack"
-; V3-GFX802: .amdgcn_target "amdgcn-amd-amdhsa--gfx802"
-; V3-GFX803: .amdgcn_target "amdgcn-amd-amdhsa--gfx803"
-; V3-GFX805: .amdgcn_target "amdgcn-amd-amdhsa--gfx805"
-; V3-GFX810-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810"
-; V3-GFX810-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack"
-; V3-GFX900-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900"
-; V3-GFX900-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx900+xnack"
-; V3-GFX902-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902"
-; V3-GFX902-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx902+xnack"
-; V3-GFX904-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904"
-; V3-GFX904-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
-; V3-GFX906-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906"
-; V3-GFX906-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+sram-ecc"
-; V3-GFX906-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack"
-; V3-GFX906-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx906+xnack+sram-ecc"
-; V3-GFX908-NOSRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908"
-; V3-GFX908-SRAMECC-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+sram-ecc"
-; V3-GFX908-NOSRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack"
-; V3-GFX908-SRAMECC-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx908+xnack+sram-ecc"
-; V3-GFX909-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909"
-; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack"
-; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
-; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack"
-; V3-GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+sram-ecc"
-; V3-GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
-; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
-; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
-; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011"
-; V3-GFX1011-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011+xnack"
-; V3-GFX1012-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012"
-; V3-GFX1012-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1012+xnack"
-; V3-GFX1013-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013"
-; V3-GFX1013-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1013+xnack"
-; V3-GFX1030: .amdgcn_target "amdgcn-amd-amdhsa--gfx1030"
-; V3-GFX1031: .amdgcn_target "amdgcn-amd-amdhsa--gfx1031"
-; V3-GFX1032: .amdgcn_target "amdgcn-amd-amdhsa--gfx1032"
-; V3-GFX1033: .amdgcn_target "amdgcn-amd-amdhsa--gfx1033"
-; V3-GFX1034: .amdgcn_target "amdgcn-amd-amdhsa--gfx1034"
-; V3-GFX1035: .amdgcn_target "amdgcn-amd-amdhsa--gfx1035"
-; V3-GFX1036: .amdgcn_target "amdgcn-amd-amdhsa--gfx1036"
-; V3-GFX1100: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100"
-; V3-GFX1101: .amdgcn_target "amdgcn-amd-amdhsa--gfx1101"
-; V3-GFX1102: .amdgcn_target "amdgcn-amd-amdhsa--gfx1102"
-; V3-GFX1103: .amdgcn_target "amdgcn-amd-amdhsa--gfx1103"
-; V3-GFX1150: .amdgcn_target "amdgcn-amd-amdhsa--gfx1150"
-; V3-GFX1151: .amdgcn_target "amdgcn-amd-amdhsa--gfx1151"
-
-
-
-define amdgpu_kernel void @directive_amdgcn_target() {
-  ret void
-}
-
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
similarity index 98%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
index 37b124e7f59a09f..042abe382283a65 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-enqueue-kernel-.ll
@@ -142,7 +142,7 @@ define amdgpu_kernel void @test_no_default_queue(i8 %a) #3
 
 ; CHECK:  amdhsa.version:
 ; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
 ; CHECK-NOT:  amdhsa.printf:
 
 attributes #0 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-no-completion-action" "amdgpu-implicitarg-num-bytes"="48" }
@@ -151,7 +151,7 @@ attributes #2 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-implici
 attributes #3 = { optnone noinline "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="48" }
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 
 !1 = !{i32 0}
 !2 = !{!"none"}

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll
index 8e8023aa16f1303..fb08fd2c45085a1 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ctor-dtor-list.ll
@@ -39,4 +39,4 @@ define internal void @bar.5() {
 ; PARSER: AMDGPU HSA Metadata Parser Test: PASS
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
index 69efc47008e6aad..dc3a6e8b633b233 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-from-llvm-ir-full.ll
@@ -1739,14 +1739,14 @@ define amdgpu_kernel void @unknown_addrspace_kernarg(ptr addrspace(12345) %ptr)
 ; CHECK-NEXT: - '2:1:8:%g\n'
 ; CHECK:  amdhsa.version:
 ; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
 
 attributes #0 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" }
 attributes #1 = { optnone noinline "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-implicitarg-num-bytes"="56" "runtime-handle"="__test_block_invoke_kernel_runtime_handle" }
 attributes #2 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 
 !llvm.printf.fmts = !{!100, !101}
 

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
index 47b882494c9191a..f4892ebdc9c9374 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hidden-args-v4.ll
@@ -286,7 +286,7 @@ entry:
 
 ; CHECK:  amdhsa.version:
 ; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
 
 ; We don't have a use of llvm.amdgcn.implicitarg.ptr, so optnone to
 ; avoid optimizing out the implicit argument allocation.
@@ -298,4 +298,4 @@ attributes #4 = { optnone noinline "amdgpu-implicitarg-num-bytes"="48" }
 attributes #5 = { optnone noinline "amdgpu-implicitarg-num-bytes"="56" }
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
similarity index 96%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
index cb3ae289721bc17..22c6e147762200d 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-v3-asan.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-present-asan.ll
@@ -35,12 +35,12 @@ define amdgpu_kernel void @test_kernel(i8 %a) #0
 
 ; CHECK:  amdhsa.version:
 ; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
 
 attributes #0 = { sanitize_address "amdgpu-implicitarg-num-bytes"="48" }
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 !1 = !{i32 0}
 !2 = !{!"none"}
 !3 = !{!"char"}

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
index a3f8c5cff95df85..8f90025fe8e29c1 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-hostcall-v4.ll
@@ -296,4 +296,4 @@ attributes #3 = { "amdgpu-implicitarg-num-bytes"="48" "amdgpu-no-hostcall-ptr" }
 attributes #4 = { noinline }
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll
similarity index 98%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll
index b7f58bbb51bb29d..6d49f22eb429b24 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-images-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-images.ll
@@ -96,10 +96,10 @@ define amdgpu_kernel void @test(ptr addrspace(1) %a,
 
 ; CHECK:  amdhsa.version:
 ; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}
 
 !1 = !{!"image1d_t", !"image1d_array_t", !"image1d_buffer_t",
        !"image2d_t", !"image2d_array_t", !"image2d_array_depth_t",

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll
similarity index 80%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll
index 8117037baaffc9d..fc5e6e273125334 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-1.ll
@@ -5,9 +5,9 @@
 ; CHECK: ---
 ; CHECK: amdhsa.version:
 ; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
 ; CHECK: ...
 
 !opencl.ocl.version = !{}
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll
similarity index 81%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll
index ea744863a9b8875..1ec79c95bc2a308 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-invalid-ocl-version-3.ll
@@ -5,10 +5,10 @@
 ; CHECK: ---
 ; CHECK: amdhsa.version:
 ; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
 ; CHECK: ...
 
 !opencl.ocl.version = !{!0}
 !llvm.module.flags = !{!1}
 !0 = !{i32 1}
-!1 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!1 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
similarity index 99%
rename from llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
rename to llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
index d6f7a92af9dcb6f..e45c4d1786faf67 100644
--- a/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props-v3.ll
+++ b/llvm/test/CodeGen/AMDGPU/hsa-metadata-kernel-code-props.ll
@@ -158,11 +158,11 @@ define amdgpu_kernel void @num_spilled_vgprs() #1 {
 
 ; CHECK:  amdhsa.version:
 ; CHECK-NEXT: - 1
-; CHECK-NEXT: - 0
+; CHECK-NEXT: - 1
 
 attributes #0 = { "amdgpu-num-sgpr"="14" }
 attributes #1 = { "amdgpu-num-vgpr"="20" }
 attributes #2 = { "amdgpu-flat-work-group-size"="1,256" }
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
index 9760e93eb48e6bd..9e6c0ef86906ddb 100644
--- a/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicit-kernarg-backend-usage.ll
@@ -1,36 +1,11 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V3 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V4 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 | FileCheck --check-prefix=GFX8V5 %s
 
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V3 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V4 %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/500/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck --check-prefixes=GFX9V5 %s
 
 define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addrspace(3) %ptr.local) {
-; GFX8V3-LABEL: addrspacecast:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; GFX8V3-NEXT:    s_load_dwordx2 s[2:3], s[4:5], 0x40
-; GFX8V3-NEXT:    v_mov_b32_e32 v4, 1
-; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT:    s_cmp_lg_u32 s0, -1
-; GFX8V3-NEXT:    s_cselect_b32 s3, s3, 0
-; GFX8V3-NEXT:    s_cselect_b32 s0, s0, 0
-; GFX8V3-NEXT:    s_cmp_lg_u32 s1, -1
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s3
-; GFX8V3-NEXT:    s_cselect_b32 s0, s2, 0
-; GFX8V3-NEXT:    s_cselect_b32 s1, s1, 0
-; GFX8V3-NEXT:    v_mov_b32_e32 v2, s1
-; GFX8V3-NEXT:    v_mov_b32_e32 v3, s0
-; GFX8V3-NEXT:    flat_store_dword v[0:1], v4
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, 2
-; GFX8V3-NEXT:    flat_store_dword v[2:3], v0
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    s_endpgm
-;
 ; GFX8V4-LABEL: addrspacecast:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
@@ -77,30 +52,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 ; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V5-NEXT:    s_endpgm
 ;
-; GFX9V3-LABEL: addrspacecast:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; GFX9V3-NEXT:    s_mov_b64 s[2:3], src_private_base
-; GFX9V3-NEXT:    s_mov_b64 s[4:5], src_shared_base
-; GFX9V3-NEXT:    v_mov_b32_e32 v4, 1
-; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT:    s_cmp_lg_u32 s0, -1
-; GFX9V3-NEXT:    s_cselect_b32 s2, s3, 0
-; GFX9V3-NEXT:    s_cselect_b32 s0, s0, 0
-; GFX9V3-NEXT:    s_cmp_lg_u32 s1, -1
-; GFX9V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9V3-NEXT:    v_mov_b32_e32 v1, s2
-; GFX9V3-NEXT:    s_cselect_b32 s0, s5, 0
-; GFX9V3-NEXT:    s_cselect_b32 s1, s1, 0
-; GFX9V3-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9V3-NEXT:    v_mov_b32_e32 v3, s0
-; GFX9V3-NEXT:    flat_store_dword v[0:1], v4
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    v_mov_b32_e32 v0, 2
-; GFX9V3-NEXT:    flat_store_dword v[2:3], v0
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    s_endpgm
-;
 ; GFX9V4-LABEL: addrspacecast:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
@@ -156,18 +107,6 @@ define amdgpu_kernel void @addrspacecast(ptr addrspace(5) %ptr.private, ptr addr
 }
 
 define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_is_shared:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_load_dword s0, s[4:5], 0x40
-; GFX8V3-NEXT:    s_load_dword s1, s[6:7], 0x4
-; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT:    s_cmp_eq_u32 s1, s0
-; GFX8V3-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX8V3-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX8V3-NEXT:    flat_store_dword v[0:1], v0
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    s_endpgm
-;
 ; GFX8V4-LABEL: llvm_amdgcn_is_shared:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_load_dword s0, s[4:5], 0x40
@@ -192,18 +131,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
 ; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V5-NEXT:    s_endpgm
 ;
-; GFX9V3-LABEL: llvm_amdgcn_is_shared:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_load_dword s2, s[4:5], 0x4
-; GFX9V3-NEXT:    s_mov_b64 s[0:1], src_shared_base
-; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT:    s_cmp_eq_u32 s2, s1
-; GFX9V3-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX9V3-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX9V3-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    s_endpgm
-;
 ; GFX9V4-LABEL: llvm_amdgcn_is_shared:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_load_dword s2, s[4:5], 0x4
@@ -234,18 +161,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_shared(ptr %ptr) {
 }
 
 define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
-; GFX8V3-LABEL: llvm_amdgcn_is_private:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_load_dword s0, s[4:5], 0x44
-; GFX8V3-NEXT:    s_load_dword s1, s[6:7], 0x4
-; GFX8V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8V3-NEXT:    s_cmp_eq_u32 s1, s0
-; GFX8V3-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX8V3-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX8V3-NEXT:    flat_store_dword v[0:1], v0
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    s_endpgm
-;
 ; GFX8V4-LABEL: llvm_amdgcn_is_private:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_load_dword s0, s[4:5], 0x44
@@ -270,18 +185,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
 ; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V5-NEXT:    s_endpgm
 ;
-; GFX9V3-LABEL: llvm_amdgcn_is_private:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_load_dword s2, s[4:5], 0x4
-; GFX9V3-NEXT:    s_mov_b64 s[0:1], src_private_base
-; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT:    s_cmp_eq_u32 s2, s1
-; GFX9V3-NEXT:    s_cselect_b64 s[0:1], -1, 0
-; GFX9V3-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX9V3-NEXT:    global_store_dword v[0:1], v0, off
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    s_endpgm
-;
 ; GFX9V4-LABEL: llvm_amdgcn_is_private:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_load_dword s2, s[4:5], 0x4
@@ -312,11 +215,6 @@ define amdgpu_kernel void @llvm_amdgcn_is_private(ptr %ptr) {
 }
 
 define amdgpu_kernel void @llvm_trap() {
-; GFX8V3-LABEL: llvm_trap:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; GFX8V3-NEXT:    s_trap 2
-;
 ; GFX8V4-LABEL: llvm_trap:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_mov_b64 s[0:1], s[4:5]
@@ -328,11 +226,6 @@ define amdgpu_kernel void @llvm_trap() {
 ; GFX8V5-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX8V5-NEXT:    s_trap 2
 ;
-; GFX9V3-LABEL: llvm_trap:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; GFX9V3-NEXT:    s_trap 2
-;
 ; GFX9V4-LABEL: llvm_trap:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_trap 2
@@ -345,10 +238,6 @@ define amdgpu_kernel void @llvm_trap() {
 }
 
 define amdgpu_kernel void @llvm_debugtrap() {
-; GFX8V3-LABEL: llvm_debugtrap:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    s_trap 3
-;
 ; GFX8V4-LABEL: llvm_debugtrap:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    s_trap 3
@@ -357,10 +246,6 @@ define amdgpu_kernel void @llvm_debugtrap() {
 ; GFX8V5:       ; %bb.0:
 ; GFX8V5-NEXT:    s_trap 3
 ;
-; GFX9V3-LABEL: llvm_debugtrap:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    s_trap 3
-;
 ; GFX9V4-LABEL: llvm_debugtrap:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    s_trap 3
@@ -373,31 +258,6 @@ define amdgpu_kernel void @llvm_debugtrap() {
 }
 
 define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr)  {
-; GFX8V3-LABEL: llvm_amdgcn_queue_ptr:
-; GFX8V3:       ; %bb.0:
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s6
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s7
-; GFX8V3-NEXT:    s_add_u32 s0, s8, 8
-; GFX8V3-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT:    s_addc_u32 s1, s9, 0
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8V3-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s4
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s5
-; GFX8V3-NEXT:    flat_load_ubyte v0, v[0:1] glc
-; GFX8V3-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX8V3-NEXT:    v_mov_b32_e32 v2, s10
-; GFX8V3-NEXT:    v_mov_b32_e32 v3, s11
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8V3-NEXT:    v_mov_b32_e32 v0, s0
-; GFX8V3-NEXT:    v_mov_b32_e32 v1, s1
-; GFX8V3-NEXT:    flat_store_dwordx2 v[0:1], v[2:3]
-; GFX8V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX8V3-NEXT:    s_endpgm
-;
 ; GFX8V4-LABEL: llvm_amdgcn_queue_ptr:
 ; GFX8V4:       ; %bb.0:
 ; GFX8V4-NEXT:    v_mov_b32_e32 v0, s6
@@ -446,23 +306,6 @@ define amdgpu_kernel void @llvm_amdgcn_queue_ptr(ptr addrspace(1) %ptr)  {
 ; GFX8V5-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8V5-NEXT:    s_endpgm
 ;
-; GFX9V3-LABEL: llvm_amdgcn_queue_ptr:
-; GFX9V3:       ; %bb.0:
-; GFX9V3-NEXT:    v_mov_b32_e32 v2, 0
-; GFX9V3-NEXT:    global_load_ubyte v0, v2, s[6:7] glc
-; GFX9V3-NEXT:    global_load_ubyte v0, v2, s[8:9] offset:8 glc
-; GFX9V3-NEXT:    global_load_ubyte v0, v2, s[4:5] glc
-; GFX9V3-NEXT:    s_load_dwordx2 s[0:1], s[8:9], 0x0
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    v_mov_b32_e32 v0, s10
-; GFX9V3-NEXT:    v_mov_b32_e32 v1, s11
-; GFX9V3-NEXT:    ; kill: killed $sgpr6_sgpr7
-; GFX9V3-NEXT:    ; kill: killed $sgpr4_sgpr5
-; GFX9V3-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9V3-NEXT:    global_store_dwordx2 v2, v[0:1], s[0:1]
-; GFX9V3-NEXT:    s_waitcnt vmcnt(0)
-; GFX9V3-NEXT:    s_endpgm
-;
 ; GFX9V4-LABEL: llvm_amdgcn_queue_ptr:
 ; GFX9V4:       ; %bb.0:
 ; GFX9V4-NEXT:    v_mov_b32_e32 v2, 0

diff  --git a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
index 0353e7ee49ab953..a04fe28dbffffab 100644
--- a/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernarg-size.ll
@@ -1,17 +1,8 @@
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s
 ; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=DOORBELL %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 | FileCheck --check-prefix=HSA %s
 
 declare void @llvm.trap() #0
 
-; HSA:      .amdhsa_kernel trap
-; HSA-NEXT:     .amdhsa_group_segment_fixed_size 0
-; HSA-NEXT:     .amdhsa_private_segment_fixed_size 0
-; HSA-NEXT:     .amdhsa_kernarg_size 8
-; HSA-NEXT:     .amdhsa_user_sgpr_count 8
-; HSA-NEXT:     .amdhsa_user_sgpr_private_segment_buffer 1
-; HSA:      .end_amdhsa_kernel
-
 ; DOORBELL:      .amdhsa_kernel trap
 ; DOORBELL-NEXT:     .amdhsa_group_segment_fixed_size 0
 ; DOORBELL-NEXT:     .amdhsa_private_segment_fixed_size 0

diff  --git a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
index 792ec2675247f61..9ed896c148e6485 100644
--- a/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
+++ b/llvm/test/CodeGen/AMDGPU/stack-realign-kernel.ll
@@ -317,4 +317,4 @@ attributes #1 = { nounwind "stackrealign" }
 attributes #2 = { nounwind alignstack=128 }
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 300}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/llvm/test/CodeGen/AMDGPU/trap-abis.ll b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
index 03ea582698486ce..54a15513cf0a506 100644
--- a/llvm/test/CodeGen/AMDGPU/trap-abis.ll
+++ b/llvm/test/CodeGen/AMDGPU/trap-abis.ll
@@ -1,101 +1,54 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V3 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900-V4 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V3 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803-V4 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V3 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900-V4 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/300/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V3 %s
-; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900-V4 %s
+; RUN: llc %s -o - -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=NOHSA-TRAP-GFX900 %s
+; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx803 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX803 %s
+; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs | FileCheck --check-prefix=HSA-TRAP-GFX900 %s
+; RUN: llc %s -o - -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=-trap-handler -verify-machineinstrs | FileCheck --check-prefix=HSA-NOTRAP-GFX900 %s
 
 declare void @llvm.trap() #0
 declare void @llvm.debugtrap() #1
 
 define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
-; NOHSA-TRAP-GFX900-V3-LABEL: trap:
-; NOHSA-TRAP-GFX900-V3:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
-; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 1
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT:    global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_endpgm
-;
-; NOHSA-TRAP-GFX900-V4-LABEL: trap:
-; NOHSA-TRAP-GFX900-V4:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 1
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT:    global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_endpgm
-;
-; HSA-TRAP-GFX803-V3-LABEL: trap:
-; HSA-TRAP-GFX803-V3:       ; %bb.0:
-; HSA-TRAP-GFX803-V3-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v2, 1
-; HSA-TRAP-GFX803-V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX803-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v0, s2
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v1, s3
-; HSA-TRAP-GFX803-V3-NEXT:    flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT:    s_trap 2
-;
-; HSA-TRAP-GFX803-V4-LABEL: trap:
-; HSA-TRAP-GFX803-V4:       ; %bb.0:
-; HSA-TRAP-GFX803-V4-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v2, 1
-; HSA-TRAP-GFX803-V4-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX803-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v0, s2
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v1, s3
-; HSA-TRAP-GFX803-V4-NEXT:    flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT:    s_trap 2
-;
-; HSA-TRAP-GFX900-V3-LABEL: trap:
-; HSA-TRAP-GFX900-V3:       ; %bb.0:
-; HSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
-; HSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 1
-; HSA-TRAP-GFX900-V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT:    global_store_dword v0, v1, s[2:3]
-; HSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT:    s_trap 2
-;
-; HSA-TRAP-GFX900-V4-LABEL: trap:
-; HSA-TRAP-GFX900-V4:       ; %bb.0:
-; HSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 1
-; HSA-TRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT:    s_trap 2
-;
-; HSA-NOTRAP-GFX900-V3-LABEL: trap:
-; HSA-NOTRAP-GFX900-V3:       ; %bb.0:
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-NOTRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 1
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_endpgm
-;
-; HSA-NOTRAP-GFX900-V4-LABEL: trap:
-; HSA-NOTRAP-GFX900-V4:       ; %bb.0:
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-NOTRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 1
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_endpgm
+; NOHSA-TRAP-GFX900-LABEL: trap:
+; NOHSA-TRAP-GFX900:       ; %bb.0:
+; NOHSA-TRAP-GFX900-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT:    global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT:    s_endpgm
+;
+; HSA-TRAP-GFX803-LABEL: trap:
+; HSA-TRAP-GFX803:       ; %bb.0:
+; HSA-TRAP-GFX803-NEXT:    s_load_dwordx2 s[2:3], s[6:7], 0x0
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v0, s2
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v1, s3
+; HSA-TRAP-GFX803-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT:    s_trap 2
+;
+; HSA-TRAP-GFX900-LABEL: trap:
+; HSA-TRAP-GFX900:       ; %bb.0:
+; HSA-TRAP-GFX900-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-NEXT:    global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT:    s_trap 2
+;
+; HSA-NOTRAP-GFX900-LABEL: trap:
+; HSA-NOTRAP-GFX900:       ; %bb.0:
+; HSA-NOTRAP-GFX900-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-NOTRAP-GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-NEXT:    v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT:    global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT:    s_endpgm
   store volatile i32 1, ptr addrspace(1) %arg0
   call void @llvm.trap()
   unreachable
@@ -104,150 +57,77 @@ define amdgpu_kernel void @trap(ptr addrspace(1) nocapture readonly %arg0) {
 }
 
 define amdgpu_kernel void @non_entry_trap(ptr addrspace(1) nocapture readonly %arg0) local_unnamed_addr {
-; NOHSA-TRAP-GFX900-V3-LABEL: non_entry_trap:
-; NOHSA-TRAP-GFX900-V3:       ; %bb.0: ; %entry
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[2:3], 0x24
-; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT:    global_load_dword v1, v0, s[0:1] glc
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v1
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_cbranch_vccz .LBB1_2
-; NOHSA-TRAP-GFX900-V3-NEXT:  ; %bb.1: ; %ret
-; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 3
-; NOHSA-TRAP-GFX900-V3-NEXT:    global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_endpgm
-; NOHSA-TRAP-GFX900-V3-NEXT:  .LBB1_2: ; %trap
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_endpgm
-;
-; NOHSA-TRAP-GFX900-V4-LABEL: non_entry_trap:
-; NOHSA-TRAP-GFX900-V4:       ; %bb.0: ; %entry
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT:    global_load_dword v1, v0, s[0:1] glc
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v1
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_cbranch_vccz .LBB1_2
-; NOHSA-TRAP-GFX900-V4-NEXT:  ; %bb.1: ; %ret
-; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 3
-; NOHSA-TRAP-GFX900-V4-NEXT:    global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_endpgm
-; NOHSA-TRAP-GFX900-V4-NEXT:  .LBB1_2: ; %trap
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_endpgm
-;
-; HSA-TRAP-GFX803-V3-LABEL: non_entry_trap:
-; HSA-TRAP-GFX803-V3:       ; %bb.0: ; %entry
-; HSA-TRAP-GFX803-V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-TRAP-GFX803-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V3-NEXT:    flat_load_dword v0, v[0:1] glc
-; HSA-TRAP-GFX803-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
-; HSA-TRAP-GFX803-V3-NEXT:    s_cbranch_vccz .LBB1_2
-; HSA-TRAP-GFX803-V3-NEXT:  ; %bb.1: ; %ret
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v2, 3
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V3-NEXT:    flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT:    s_endpgm
-; HSA-TRAP-GFX803-V3-NEXT:  .LBB1_2: ; %trap
-; HSA-TRAP-GFX803-V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX803-V3-NEXT:    s_trap 2
-;
-; HSA-TRAP-GFX803-V4-LABEL: non_entry_trap:
-; HSA-TRAP-GFX803-V4:       ; %bb.0: ; %entry
-; HSA-TRAP-GFX803-V4-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-TRAP-GFX803-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V4-NEXT:    flat_load_dword v0, v[0:1] glc
-; HSA-TRAP-GFX803-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
-; HSA-TRAP-GFX803-V4-NEXT:    s_cbranch_vccz .LBB1_2
-; HSA-TRAP-GFX803-V4-NEXT:  ; %bb.1: ; %ret
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v2, 3
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V4-NEXT:    flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT:    s_endpgm
-; HSA-TRAP-GFX803-V4-NEXT:  .LBB1_2: ; %trap
-; HSA-TRAP-GFX803-V4-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX803-V4-NEXT:    s_trap 2
-;
-; HSA-TRAP-GFX900-V3-LABEL: non_entry_trap:
-; HSA-TRAP-GFX900-V3:       ; %bb.0: ; %entry
-; HSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT:    global_load_dword v1, v0, s[0:1] glc
-; HSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v1
-; HSA-TRAP-GFX900-V3-NEXT:    s_cbranch_vccz .LBB1_2
-; HSA-TRAP-GFX900-V3-NEXT:  ; %bb.1: ; %ret
-; HSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 3
-; HSA-TRAP-GFX900-V3-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT:    s_endpgm
-; HSA-TRAP-GFX900-V3-NEXT:  .LBB1_2: ; %trap
-; HSA-TRAP-GFX900-V3-NEXT:    s_mov_b64 s[0:1], s[4:5]
-; HSA-TRAP-GFX900-V3-NEXT:    s_trap 2
-;
-; HSA-TRAP-GFX900-V4-LABEL: non_entry_trap:
-; HSA-TRAP-GFX900-V4:       ; %bb.0: ; %entry
-; HSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT:    global_load_dword v1, v0, s[0:1] glc
-; HSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v1
-; HSA-TRAP-GFX900-V4-NEXT:    s_cbranch_vccz .LBB1_2
-; HSA-TRAP-GFX900-V4-NEXT:  ; %bb.1: ; %ret
-; HSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 3
-; HSA-TRAP-GFX900-V4-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT:    s_endpgm
-; HSA-TRAP-GFX900-V4-NEXT:  .LBB1_2: ; %trap
-; HSA-TRAP-GFX900-V4-NEXT:    s_trap 2
-;
-; HSA-NOTRAP-GFX900-V3-LABEL: non_entry_trap:
-; HSA-NOTRAP-GFX900-V3:       ; %bb.0: ; %entry
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
-; HSA-NOTRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT:    global_load_dword v1, v0, s[0:1] glc
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v1
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_cbranch_vccz .LBB1_2
-; HSA-NOTRAP-GFX900-V3-NEXT:  ; %bb.1: ; %ret
-; HSA-NOTRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 3
-; HSA-NOTRAP-GFX900-V3-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_endpgm
-; HSA-NOTRAP-GFX900-V3-NEXT:  .LBB1_2: ; %trap
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_endpgm
-;
-; HSA-NOTRAP-GFX900-V4-LABEL: non_entry_trap:
-; HSA-NOTRAP-GFX900-V4:       ; %bb.0: ; %entry
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-NOTRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT:    global_load_dword v1, v0, s[0:1] glc
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v1
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_cbranch_vccz .LBB1_2
-; HSA-NOTRAP-GFX900-V4-NEXT:  ; %bb.1: ; %ret
-; HSA-NOTRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 3
-; HSA-NOTRAP-GFX900-V4-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_endpgm
-; HSA-NOTRAP-GFX900-V4-NEXT:  .LBB1_2: ; %trap
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_endpgm
+; NOHSA-TRAP-GFX900-LABEL: non_entry_trap:
+; NOHSA-TRAP-GFX900:       ; %bb.0: ; %entry
+; NOHSA-TRAP-GFX900-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT:    global_load_dword v1, v0, s[0:1] glc
+; NOHSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v1
+; NOHSA-TRAP-GFX900-NEXT:    s_cbranch_vccz .LBB1_2
+; NOHSA-TRAP-GFX900-NEXT:  ; %bb.1: ; %ret
+; NOHSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v1, 3
+; NOHSA-TRAP-GFX900-NEXT:    global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT:    s_endpgm
+; NOHSA-TRAP-GFX900-NEXT:  .LBB1_2: ; %trap
+; NOHSA-TRAP-GFX900-NEXT:    s_endpgm
+;
+; HSA-TRAP-GFX803-LABEL: non_entry_trap:
+; HSA-TRAP-GFX803:       ; %bb.0: ; %entry
+; HSA-TRAP-GFX803-NEXT:    s_load_dwordx2 s[0:1], s[6:7], 0x0
+; HSA-TRAP-GFX803-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-NEXT:    flat_load_dword v0, v[0:1] glc
+; HSA-TRAP-GFX803-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v0
+; HSA-TRAP-GFX803-NEXT:    s_cbranch_vccz .LBB1_2
+; HSA-TRAP-GFX803-NEXT:  ; %bb.1: ; %ret
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v2, 3
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT:    s_endpgm
+; HSA-TRAP-GFX803-NEXT:  .LBB1_2: ; %trap
+; HSA-TRAP-GFX803-NEXT:    s_mov_b64 s[0:1], s[4:5]
+; HSA-TRAP-GFX803-NEXT:    s_trap 2
+;
+; HSA-TRAP-GFX900-LABEL: non_entry_trap:
+; HSA-TRAP-GFX900:       ; %bb.0: ; %entry
+; HSA-TRAP-GFX900-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-NEXT:    global_load_dword v1, v0, s[0:1] glc
+; HSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-TRAP-GFX900-NEXT:    s_cbranch_vccz .LBB1_2
+; HSA-TRAP-GFX900-NEXT:  ; %bb.1: ; %ret
+; HSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v1, 3
+; HSA-TRAP-GFX900-NEXT:    global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT:    s_endpgm
+; HSA-TRAP-GFX900-NEXT:  .LBB1_2: ; %trap
+; HSA-TRAP-GFX900-NEXT:    s_trap 2
+;
+; HSA-NOTRAP-GFX900-LABEL: non_entry_trap:
+; HSA-NOTRAP-GFX900:       ; %bb.0: ; %entry
+; HSA-NOTRAP-GFX900-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-NOTRAP-GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT:    global_load_dword v1, v0, s[0:1] glc
+; HSA-NOTRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT:    v_cmp_eq_u32_e32 vcc, -1, v1
+; HSA-NOTRAP-GFX900-NEXT:    s_cbranch_vccz .LBB1_2
+; HSA-NOTRAP-GFX900-NEXT:  ; %bb.1: ; %ret
+; HSA-NOTRAP-GFX900-NEXT:    v_mov_b32_e32 v1, 3
+; HSA-NOTRAP-GFX900-NEXT:    global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT:    s_endpgm
+; HSA-NOTRAP-GFX900-NEXT:  .LBB1_2: ; %trap
+; HSA-NOTRAP-GFX900-NEXT:    s_endpgm
 entry:
   %tmp29 = load volatile i32, ptr addrspace(1) %arg0
   %cmp = icmp eq i32 %tmp29, -1
@@ -263,115 +143,60 @@ ret:
 }
 
 define amdgpu_kernel void @debugtrap(ptr addrspace(1) nocapture readonly %arg0) {
-; NOHSA-TRAP-GFX900-V3-LABEL: debugtrap:
-; NOHSA-TRAP-GFX900-V3:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 1
-; NOHSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v2, 2
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT:    global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT:    global_store_dword v0, v2, s[0:1]
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V3-NEXT:    s_endpgm
-;
-; NOHSA-TRAP-GFX900-V4-LABEL: debugtrap:
-; NOHSA-TRAP-GFX900-V4:       ; %bb.0:
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
-; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
-; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 1
-; NOHSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v2, 2
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT:    global_store_dword v0, v1, s[0:1]
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT:    global_store_dword v0, v2, s[0:1]
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; NOHSA-TRAP-GFX900-V4-NEXT:    s_endpgm
-;
-; HSA-TRAP-GFX803-V3-LABEL: debugtrap:
-; HSA-TRAP-GFX803-V3:       ; %bb.0:
-; HSA-TRAP-GFX803-V3-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v2, 1
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v3, 2
-; HSA-TRAP-GFX803-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V3-NEXT:    v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V3-NEXT:    flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT:    s_trap 3
-; HSA-TRAP-GFX803-V3-NEXT:    flat_store_dword v[0:1], v3
-; HSA-TRAP-GFX803-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V3-NEXT:    s_endpgm
-;
-; HSA-TRAP-GFX803-V4-LABEL: debugtrap:
-; HSA-TRAP-GFX803-V4:       ; %bb.0:
-; HSA-TRAP-GFX803-V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v2, 1
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v3, 2
-; HSA-TRAP-GFX803-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v0, s0
-; HSA-TRAP-GFX803-V4-NEXT:    v_mov_b32_e32 v1, s1
-; HSA-TRAP-GFX803-V4-NEXT:    flat_store_dword v[0:1], v2
-; HSA-TRAP-GFX803-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT:    s_trap 3
-; HSA-TRAP-GFX803-V4-NEXT:    flat_store_dword v[0:1], v3
-; HSA-TRAP-GFX803-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX803-V4-NEXT:    s_endpgm
-;
-; HSA-TRAP-GFX900-V3-LABEL: debugtrap:
-; HSA-TRAP-GFX900-V3:       ; %bb.0:
-; HSA-TRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 1
-; HSA-TRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v2, 2
-; HSA-TRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT:    s_trap 3
-; HSA-TRAP-GFX900-V3-NEXT:    global_store_dword v0, v2, s[0:1]
-; HSA-TRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V3-NEXT:    s_endpgm
-;
-; HSA-TRAP-GFX900-V4-LABEL: debugtrap:
-; HSA-TRAP-GFX900-V4:       ; %bb.0:
-; HSA-TRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 1
-; HSA-TRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v2, 2
-; HSA-TRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT:    s_trap 3
-; HSA-TRAP-GFX900-V4-NEXT:    global_store_dword v0, v2, s[0:1]
-; HSA-TRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-TRAP-GFX900-V4-NEXT:    s_endpgm
-;
-; HSA-NOTRAP-GFX900-V3-LABEL: debugtrap:
-; HSA-NOTRAP-GFX900-V3:       ; %bb.0:
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-NOTRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v1, 1
-; HSA-NOTRAP-GFX900-V3-NEXT:    v_mov_b32_e32 v2, 2
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT:    global_store_dword v0, v2, s[0:1]
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V3-NEXT:    s_endpgm
-;
-; HSA-NOTRAP-GFX900-V4-LABEL: debugtrap:
-; HSA-NOTRAP-GFX900-V4:       ; %bb.0:
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; HSA-NOTRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v0, 0
-; HSA-NOTRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v1, 1
-; HSA-NOTRAP-GFX900-V4-NEXT:    v_mov_b32_e32 v2, 2
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_waitcnt lgkmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT:    global_store_dword v0, v1, s[0:1]
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT:    global_store_dword v0, v2, s[0:1]
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_waitcnt vmcnt(0)
-; HSA-NOTRAP-GFX900-V4-NEXT:    s_endpgm
+; NOHSA-TRAP-GFX900-LABEL: debugtrap:
+; NOHSA-TRAP-GFX900:       ; %bb.0:
+; NOHSA-TRAP-GFX900-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; NOHSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; NOHSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v1, 1
+; NOHSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v2, 2
+; NOHSA-TRAP-GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT:    global_store_dword v0, v1, s[0:1]
+; NOHSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT:    global_store_dword v0, v2, s[0:1]
+; NOHSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; NOHSA-TRAP-GFX900-NEXT:    s_endpgm
+;
+; HSA-TRAP-GFX803-LABEL: debugtrap:
+; HSA-TRAP-GFX803:       ; %bb.0:
+; HSA-TRAP-GFX803-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v2, 1
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v3, 2
+; HSA-TRAP-GFX803-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v0, s0
+; HSA-TRAP-GFX803-NEXT:    v_mov_b32_e32 v1, s1
+; HSA-TRAP-GFX803-NEXT:    flat_store_dword v[0:1], v2
+; HSA-TRAP-GFX803-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT:    s_trap 3
+; HSA-TRAP-GFX803-NEXT:    flat_store_dword v[0:1], v3
+; HSA-TRAP-GFX803-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX803-NEXT:    s_endpgm
+;
+; HSA-TRAP-GFX900-LABEL: debugtrap:
+; HSA-TRAP-GFX900:       ; %bb.0:
+; HSA-TRAP-GFX900-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; HSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v1, 1
+; HSA-TRAP-GFX900-NEXT:    v_mov_b32_e32 v2, 2
+; HSA-TRAP-GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-TRAP-GFX900-NEXT:    global_store_dword v0, v1, s[0:1]
+; HSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT:    s_trap 3
+; HSA-TRAP-GFX900-NEXT:    global_store_dword v0, v2, s[0:1]
+; HSA-TRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-TRAP-GFX900-NEXT:    s_endpgm
+;
+; HSA-NOTRAP-GFX900-LABEL: debugtrap:
+; HSA-NOTRAP-GFX900:       ; %bb.0:
+; HSA-NOTRAP-GFX900-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
+; HSA-NOTRAP-GFX900-NEXT:    v_mov_b32_e32 v0, 0
+; HSA-NOTRAP-GFX900-NEXT:    v_mov_b32_e32 v1, 1
+; HSA-NOTRAP-GFX900-NEXT:    v_mov_b32_e32 v2, 2
+; HSA-NOTRAP-GFX900-NEXT:    s_waitcnt lgkmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT:    global_store_dword v0, v1, s[0:1]
+; HSA-NOTRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT:    global_store_dword v0, v2, s[0:1]
+; HSA-NOTRAP-GFX900-NEXT:    s_waitcnt vmcnt(0)
+; HSA-NOTRAP-GFX900-NEXT:    s_endpgm
   store volatile i32 1, ptr addrspace(1) %arg0
   call void @llvm.debugtrap()
   store volatile i32 2, ptr addrspace(1) %arg0
@@ -382,4 +207,4 @@ attributes #0 = { nounwind noreturn }
 attributes #1 = { nounwind }
 
 !llvm.module.flags = !{!0}
-!0 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 400}

diff  --git a/llvm/test/MC/AMDGPU/hsa-diag-v3.s b/llvm/test/MC/AMDGPU/hsa-diag-v4.s
similarity index 94%
rename from llvm/test/MC/AMDGPU/hsa-diag-v3.s
rename to llvm/test/MC/AMDGPU/hsa-diag-v4.s
index 369ac905ad2b27b..f7a554aedb746b0 100644
--- a/llvm/test/MC/AMDGPU/hsa-diag-v3.s
+++ b/llvm/test/MC/AMDGPU/hsa-diag-v4.s
@@ -1,18 +1,18 @@
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,PREGFX10,AMDHSA
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX10,AMDHSA
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX11,AMDHSA
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,PREGFX10,AMDHSA,ALL
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX8,PREGFX10,AMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX10,AMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx1100 -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,GFX10PLUS,GFX11,AMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd- -mcpu=gfx810 -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GCN,NONAMDHSA
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -mattr=+xnack -show-encoding %s 2>&1 >/dev/null | FileCheck %s --check-prefixes=GFX90A,PREGFX10,AMDHSA,ALL
 
 .text
 
 // GCN-LABEL: warning: test_target
 // GFX8-NOT: error:
-// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1010+xnack
-// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-amdhsa--gfx1100
-// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810+xnack does not match the specified target id amdgcn-amd-unknown--gfx810
+// GFX10: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1010:xnack+
+// GFX11: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-amdhsa--gfx1100
+// NONAMDHSA: error: .amdgcn_target directive's target id amdgcn-amd-amdhsa--gfx810:xnack+ does not match the specified target id amdgcn-amd-unknown--gfx810
 .warning "test_target"
-.amdgcn_target "amdgcn-amd-amdhsa--gfx810+xnack"
+.amdgcn_target "amdgcn-amd-amdhsa--gfx810:xnack+"
 
 // GCN-LABEL: warning: test_amdhsa_kernel_no_name
 // GCN: error: unknown directive

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
deleted file mode 100644
index ba60000837cdc06..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-gfx10-v3.s
+++ /dev/null
@@ -1,226 +0,0 @@
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1010 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
-// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        0000c0 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ:      0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
-// READOBJ-NEXT: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
-// READOBJ-NEXT: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
-// READOBJ-NEXT: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
-// READOBJ-NEXT: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
-// READOBJ-NEXT: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 015001e4 1f0f007f 7f040000 00000000
-// special_sgpr
-// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
-
-.p2align 8
-.type minimal, at function
-minimal:
-  s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
-  s_endpgm
-
-.p2align 8
-.type special_sgpr, at function
-special_sgpr:
-  s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
-  .amdhsa_next_free_vgpr 0
-  .amdhsa_next_free_sgpr 0
-  .amdhsa_shared_vgpr_count 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM: .amdhsa_shared_vgpr_count 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
-  .amdhsa_group_segment_fixed_size 1
-  .amdhsa_private_segment_fixed_size 1
-  .amdhsa_kernarg_size 8
-  .amdhsa_user_sgpr_private_segment_buffer 1
-  .amdhsa_user_sgpr_dispatch_ptr 1
-  .amdhsa_user_sgpr_queue_ptr 1
-  .amdhsa_user_sgpr_kernarg_segment_ptr 1
-  .amdhsa_user_sgpr_dispatch_id 1
-  .amdhsa_user_sgpr_flat_scratch_init 1
-  .amdhsa_user_sgpr_private_segment_size 1
-  .amdhsa_wavefront_size32 1
-  .amdhsa_system_sgpr_private_segment_wavefront_offset 1
-  .amdhsa_system_sgpr_workgroup_id_x 0
-  .amdhsa_system_sgpr_workgroup_id_y 1
-  .amdhsa_system_sgpr_workgroup_id_z 1
-  .amdhsa_system_sgpr_workgroup_info 1
-  .amdhsa_system_vgpr_workitem_id 1
-  .amdhsa_next_free_vgpr 9
-  .amdhsa_next_free_sgpr 27
-  .amdhsa_reserve_vcc 0
-  .amdhsa_reserve_flat_scratch 0
-  .amdhsa_reserve_xnack_mask 1
-  .amdhsa_float_round_mode_32 1
-  .amdhsa_float_round_mode_16_64 1
-  .amdhsa_float_denorm_mode_32 1
-  .amdhsa_float_denorm_mode_16_64 0
-  .amdhsa_dx10_clamp 0
-  .amdhsa_ieee_mode 0
-  .amdhsa_fp16_overflow 1
-  .amdhsa_workgroup_processor_mode 1
-  .amdhsa_memory_ordered 1
-  .amdhsa_forward_progress 1
-  .amdhsa_exception_fp_ieee_invalid_op 1
-  .amdhsa_exception_fp_denorm_src 1
-  .amdhsa_exception_fp_ieee_div_zero 1
-  .amdhsa_exception_fp_ieee_overflow 1
-  .amdhsa_exception_fp_ieee_underflow 1
-  .amdhsa_exception_fp_ieee_inexact 1
-  .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 15
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_wavefront_size32 1
-// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
-// ASM-NEXT: .amdhsa_memory_ordered 1
-// ASM-NEXT: .amdhsa_forward_progress 1
-// ASM-NEXT: .amdhsa_shared_vgpr_count 0
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-// Test that we are including special SGPR usage in the granulated count.
-.p2align 6
-.amdhsa_kernel special_sgpr
-  // Same next_free_sgpr as "complete", but...
-  .amdhsa_next_free_sgpr 27
-  // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
-  // 3 granules to 4
-  .amdhsa_reserve_flat_scratch 1
-
-  .amdhsa_reserve_vcc 0
-  .amdhsa_reserve_xnack_mask 1
-
-  .amdhsa_float_denorm_mode_16_64 0
-  .amdhsa_dx10_clamp 0
-  .amdhsa_ieee_mode 0
-  .amdhsa_next_free_vgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel special_sgpr
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
deleted file mode 100644
index 7f885b457aa63a3..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-gfx11-v3.s
+++ /dev/null
@@ -1,213 +0,0 @@
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx1100 --amdhsa-code-object-version=3 -filetype=obj < %s > %t
-// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        0000c0 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ:      0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
-// READOBJ-NEXT: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
-// READOBJ-NEXT: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
-// READOBJ-NEXT: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
-// READOBJ-NEXT: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
-// READOBJ-NEXT: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac60 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 015001e4 130f007f 5e040000 00000000
-// special_sgpr
-// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00b0 00000060 80000000 00000000 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx1100"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx1100"
-
-.p2align 8
-.type minimal, at function
-minimal:
-  s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
-  s_endpgm
-
-.p2align 8
-.type special_sgpr, at function
-special_sgpr:
-  s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
-  .amdhsa_next_free_vgpr 0
-  .amdhsa_next_free_sgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
-  .amdhsa_group_segment_fixed_size 1
-  .amdhsa_private_segment_fixed_size 1
-  .amdhsa_kernarg_size 8
-  .amdhsa_user_sgpr_dispatch_ptr 1
-  .amdhsa_user_sgpr_queue_ptr 1
-  .amdhsa_user_sgpr_kernarg_segment_ptr 1
-  .amdhsa_user_sgpr_dispatch_id 1
-  .amdhsa_user_sgpr_private_segment_size 1
-  .amdhsa_wavefront_size32 1
-  .amdhsa_enable_private_segment 1
-  .amdhsa_system_sgpr_workgroup_id_x 0
-  .amdhsa_system_sgpr_workgroup_id_y 1
-  .amdhsa_system_sgpr_workgroup_id_z 1
-  .amdhsa_system_sgpr_workgroup_info 1
-  .amdhsa_system_vgpr_workitem_id 1
-  .amdhsa_next_free_vgpr 9
-  .amdhsa_next_free_sgpr 27
-  .amdhsa_reserve_vcc 0
-  .amdhsa_float_round_mode_32 1
-  .amdhsa_float_round_mode_16_64 1
-  .amdhsa_float_denorm_mode_32 1
-  .amdhsa_float_denorm_mode_16_64 0
-  .amdhsa_dx10_clamp 0
-  .amdhsa_ieee_mode 0
-  .amdhsa_fp16_overflow 1
-  .amdhsa_workgroup_processor_mode 1
-  .amdhsa_memory_ordered 1
-  .amdhsa_forward_progress 1
-  .amdhsa_exception_fp_ieee_invalid_op 1
-  .amdhsa_exception_fp_denorm_src 1
-  .amdhsa_exception_fp_ieee_div_zero 1
-  .amdhsa_exception_fp_ieee_overflow 1
-  .amdhsa_exception_fp_ieee_underflow 1
-  .amdhsa_exception_fp_ieee_inexact 1
-  .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 9
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_wavefront_size32 1
-// ASM-NEXT: .amdhsa_enable_private_segment 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_workgroup_processor_mode 1
-// ASM-NEXT: .amdhsa_memory_ordered 1
-// ASM-NEXT: .amdhsa_forward_progress 1
-// ASM-NEXT: .amdhsa_shared_vgpr_count 0
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-// Test that we are including special SGPR usage in the granulated count.
-.p2align 6
-.amdhsa_kernel special_sgpr
-  // Same next_free_sgpr as "complete", but...
-  .amdhsa_next_free_sgpr 27
-  // ...on GFX10+ this should require an additional 6 SGPRs, pushing us from
-  // 3 granules to 4
-
-  .amdhsa_reserve_vcc 0
-
-  .amdhsa_float_denorm_mode_16_64 0
-  .amdhsa_dx10_clamp 0
-  .amdhsa_ieee_mode 0
-  .amdhsa_next_free_vgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel special_sgpr
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 11
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
deleted file mode 100644
index fd84fab8af81685..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-gfx90a-v3.s
+++ /dev/null
@@ -1,184 +0,0 @@
-// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a -filetype=obj < %s > %t
-// RUN: llvm-readobj --elf-output-style=GNU --sections --symbols --relocations %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        000080 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
-// OBJDUMP-NEXT: 0070 c1500104 210f007f 7f008100 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc"
-
-.p2align 8
-.type minimal, at function
-minimal:
-  s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
-  s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
-  .amdhsa_next_free_vgpr 0
-  .amdhsa_next_free_sgpr 0
-  .amdhsa_accum_offset 4
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM-NEXT: .amdhsa_accum_offset 4
-// ASM: .amdhsa_tg_split 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
-  .amdhsa_group_segment_fixed_size 1
-  .amdhsa_private_segment_fixed_size 1
-  .amdhsa_user_sgpr_private_segment_buffer 1
-  .amdhsa_user_sgpr_dispatch_ptr 1
-  .amdhsa_user_sgpr_queue_ptr 1
-  .amdhsa_user_sgpr_kernarg_segment_ptr 1
-  .amdhsa_user_sgpr_dispatch_id 1
-  .amdhsa_user_sgpr_flat_scratch_init 1
-  .amdhsa_kernarg_size 8
-  .amdhsa_user_sgpr_kernarg_preload_length  1
-  .amdhsa_user_sgpr_kernarg_preload_offset  1
-  .amdhsa_user_sgpr_private_segment_size 1
-  .amdhsa_system_sgpr_private_segment_wavefront_offset 1
-  .amdhsa_system_sgpr_workgroup_id_x 0
-  .amdhsa_system_sgpr_workgroup_id_y 1
-  .amdhsa_system_sgpr_workgroup_id_z 1
-  .amdhsa_system_sgpr_workgroup_info 1
-  .amdhsa_system_vgpr_workitem_id 1
-  .amdhsa_next_free_vgpr 9
-  .amdhsa_next_free_sgpr 27
-  .amdhsa_accum_offset 4
-  .amdhsa_reserve_vcc 0
-  .amdhsa_reserve_flat_scratch 0
-  .amdhsa_float_round_mode_32 1
-  .amdhsa_float_round_mode_16_64 1
-  .amdhsa_float_denorm_mode_32 1
-  .amdhsa_float_denorm_mode_16_64 0
-  .amdhsa_dx10_clamp 0
-  .amdhsa_ieee_mode 0
-  .amdhsa_fp16_overflow 1
-  .amdhsa_tg_split 1
-  .amdhsa_exception_fp_ieee_invalid_op 1
-  .amdhsa_exception_fp_denorm_src 1
-  .amdhsa_exception_fp_ieee_div_zero 1
-  .amdhsa_exception_fp_ieee_overflow 1
-  .amdhsa_exception_fp_ieee_underflow 1
-  .amdhsa_exception_fp_ieee_inexact 1
-  .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 16
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length  1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset  1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_accum_offset 4
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_tg_split 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 9
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4

diff  --git a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
deleted file mode 100644
index 9624515ecd6fb90..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
+++ /dev/null
@@ -1,178 +0,0 @@
-// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s > %t
-// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        000080 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
-// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
-// OBJDUMP-NEXT: 0070 01510104 150f007f 5e008100 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
-
-.p2align 8
-.type minimal, at function
-minimal:
-  s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
-  s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
-  .amdhsa_next_free_vgpr 0
-  .amdhsa_next_free_sgpr 0
-  .amdhsa_accum_offset 4
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM-NEXT: .amdhsa_accum_offset 4
-// ASM: .amdhsa_tg_split 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
-  .amdhsa_group_segment_fixed_size 1
-  .amdhsa_private_segment_fixed_size 1
-  .amdhsa_user_sgpr_dispatch_ptr 1
-  .amdhsa_user_sgpr_queue_ptr 1
-  .amdhsa_user_sgpr_kernarg_segment_ptr 1
-  .amdhsa_user_sgpr_dispatch_id 1
-  .amdhsa_kernarg_size 8
-  .amdhsa_user_sgpr_kernarg_preload_length  1
-  .amdhsa_user_sgpr_kernarg_preload_offset  1
-  .amdhsa_user_sgpr_private_segment_size 1
-  .amdhsa_enable_private_segment 1
-  .amdhsa_system_sgpr_workgroup_id_x 0
-  .amdhsa_system_sgpr_workgroup_id_y 1
-  .amdhsa_system_sgpr_workgroup_id_z 1
-  .amdhsa_system_sgpr_workgroup_info 1
-  .amdhsa_system_vgpr_workitem_id 1
-  .amdhsa_next_free_vgpr 9
-  .amdhsa_next_free_sgpr 27
-  .amdhsa_accum_offset 4
-  .amdhsa_reserve_vcc 0
-  .amdhsa_float_round_mode_32 1
-  .amdhsa_float_round_mode_16_64 1
-  .amdhsa_float_denorm_mode_32 1
-  .amdhsa_float_denorm_mode_16_64 0
-  .amdhsa_dx10_clamp 0
-  .amdhsa_ieee_mode 0
-  .amdhsa_fp16_overflow 1
-  .amdhsa_tg_split 1
-  .amdhsa_exception_fp_ieee_invalid_op 1
-  .amdhsa_exception_fp_denorm_src 1
-  .amdhsa_exception_fp_ieee_div_zero 1
-  .amdhsa_exception_fp_ieee_overflow 1
-  .amdhsa_exception_fp_ieee_underflow 1
-  .amdhsa_exception_fp_ieee_inexact 1
-  .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 10
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_length  1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_preload_offset  1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_enable_private_segment 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_accum_offset 4
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_tg_split 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 9
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4

diff  --git a/llvm/test/MC/AMDGPU/hsa-v3.s b/llvm/test/MC/AMDGPU/hsa-v3.s
deleted file mode 100644
index 9f854986d7bc447..000000000000000
--- a/llvm/test/MC/AMDGPU/hsa-v3.s
+++ /dev/null
@@ -1,304 +0,0 @@
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx904 --amdhsa-code-object-version=3 -mattr=+xnack -filetype=obj < %s > %t
-// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
-// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
-
-// READOBJ: Section Headers
-// READOBJ: .text   PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
-// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}}        000100 {{[0-9]+}}  A {{[0-9]+}} {{[0-9]+}} 64
-
-// READOBJ: Relocation section '.rela.rodata' at offset
-// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
-// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
-// READOBJ: 0000000000000090 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 210
-// READOBJ: 00000000000000d0 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 310
-
-// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
-// READOBJ:      0000000000000000  0 FUNC    LOCAL  PROTECTED 2 minimal
-// READOBJ-NEXT: 0000000000000100  0 FUNC    LOCAL  PROTECTED 2 complete
-// READOBJ-NEXT: 0000000000000200  0 FUNC    LOCAL  PROTECTED 2 special_sgpr
-// READOBJ-NEXT: 0000000000000300  0 FUNC    LOCAL  PROTECTED 2 disabled_user_sgpr
-// READOBJ-NEXT: 0000000000000000 64 OBJECT  LOCAL  DEFAULT   3 minimal.kd
-// READOBJ-NEXT: 0000000000000040 64 OBJECT  LOCAL  DEFAULT   3 complete.kd
-// READOBJ-NEXT: 0000000000000080 64 OBJECT  LOCAL  DEFAULT   3 special_sgpr.kd
-// READOBJ-NEXT: 00000000000000c0 64 OBJECT  LOCAL  DEFAULT   3 disabled_user_sgpr.kd
-
-// OBJDUMP: Contents of section .rodata
-// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
-// minimal
-// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
-// complete
-// OBJDUMP-NEXT: 0040 01000000 01000000 08000000 00000000
-// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0070 c2500104 1f0f007f 7f000000 00000000
-// special_sgpr
-// OBJDUMP-NEXT: 0080 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 0090 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00a0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00b0 00010000 80000000 00000000 00000000
-// disabled_user_sgpr
-// OBJDUMP-NEXT: 00c0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00d0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00e0 00000000 00000000 00000000 00000000
-// OBJDUMP-NEXT: 00f0 0000ac00 80000000 00000000 00000000
-
-.text
-// ASM: .text
-
-.amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx904+xnack"
-
-.p2align 8
-.type minimal, at function
-minimal:
-  s_endpgm
-
-.p2align 8
-.type complete, at function
-complete:
-  s_endpgm
-
-.p2align 8
-.type special_sgpr, at function
-special_sgpr:
-  s_endpgm
-
-.p2align 8
-.type disabled_user_sgpr, at function
-disabled_user_sgpr:
-  s_endpgm
-
-.rodata
-// ASM: .rodata
-
-// Test that only specifying required directives is allowed, and that defaulted
-// values are omitted.
-.p2align 6
-.amdhsa_kernel minimal
-  .amdhsa_next_free_vgpr 0
-  .amdhsa_next_free_sgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel minimal
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM: .end_amdhsa_kernel
-
-// Test that we can specify all available directives with non-default values.
-.p2align 6
-.amdhsa_kernel complete
-  .amdhsa_group_segment_fixed_size 1
-  .amdhsa_private_segment_fixed_size 1
-  .amdhsa_kernarg_size 8
-  .amdhsa_user_sgpr_private_segment_buffer 1
-  .amdhsa_user_sgpr_dispatch_ptr 1
-  .amdhsa_user_sgpr_queue_ptr 1
-  .amdhsa_user_sgpr_kernarg_segment_ptr 1
-  .amdhsa_user_sgpr_dispatch_id 1
-  .amdhsa_user_sgpr_flat_scratch_init 1
-  .amdhsa_user_sgpr_private_segment_size 1
-  .amdhsa_system_sgpr_private_segment_wavefront_offset 1
-  .amdhsa_system_sgpr_workgroup_id_x 0
-  .amdhsa_system_sgpr_workgroup_id_y 1
-  .amdhsa_system_sgpr_workgroup_id_z 1
-  .amdhsa_system_sgpr_workgroup_info 1
-  .amdhsa_system_vgpr_workitem_id 1
-  .amdhsa_next_free_vgpr 9
-  .amdhsa_next_free_sgpr 27
-  .amdhsa_reserve_vcc 0
-  .amdhsa_reserve_flat_scratch 0
-  .amdhsa_reserve_xnack_mask 1
-  .amdhsa_float_round_mode_32 1
-  .amdhsa_float_round_mode_16_64 1
-  .amdhsa_float_denorm_mode_32 1
-  .amdhsa_float_denorm_mode_16_64 0
-  .amdhsa_dx10_clamp 0
-  .amdhsa_ieee_mode 0
-  .amdhsa_fp16_overflow 1
-  .amdhsa_exception_fp_ieee_invalid_op 1
-  .amdhsa_exception_fp_denorm_src 1
-  .amdhsa_exception_fp_ieee_div_zero 1
-  .amdhsa_exception_fp_ieee_overflow 1
-  .amdhsa_exception_fp_ieee_underflow 1
-  .amdhsa_exception_fp_ieee_inexact 1
-  .amdhsa_exception_int_div_zero 1
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel complete
-// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
-// ASM-NEXT: .amdhsa_kernarg_size 8
-// ASM-NEXT: .amdhsa_user_sgpr_count 15
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_buffer 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
-// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
-// ASM-NEXT: .amdhsa_user_sgpr_flat_scratch_init 1
-// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
-// ASM-NEXT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
-// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
-// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
-// ASM-NEXT: .amdhsa_next_free_vgpr 9
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_flat_scratch 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM-NEXT: .amdhsa_float_round_mode_32 1
-// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
-// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM-NEXT: .amdhsa_fp16_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
-// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
-// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
-// ASM-NEXT: .amdhsa_exception_int_div_zero 1
-// ASM-NEXT: .end_amdhsa_kernel
-
-// Test that we are including special SGPR usage in the granulated count.
-.p2align 6
-.amdhsa_kernel special_sgpr
-  // Same next_free_sgpr as "complete", but...
-  .amdhsa_next_free_sgpr 27
-  // ...on GFX9 this should require an additional 6 SGPRs, pushing us from
-  // 3 granules to 4
-  .amdhsa_reserve_flat_scratch 1
-
-  .amdhsa_reserve_vcc 0
-  .amdhsa_reserve_xnack_mask 1
-
-  .amdhsa_float_denorm_mode_16_64 0
-  .amdhsa_dx10_clamp 0
-  .amdhsa_ieee_mode 0
-  .amdhsa_next_free_vgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel special_sgpr
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 27
-// ASM-NEXT: .amdhsa_reserve_vcc 0
-// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
-// ASM: .amdhsa_float_denorm_mode_16_64 0
-// ASM-NEXT: .amdhsa_dx10_clamp 0
-// ASM-NEXT: .amdhsa_ieee_mode 0
-// ASM: .end_amdhsa_kernel
-
-// Test that explicitly disabling user_sgpr's does not affect the user_sgpr
-// count, i.e. this should produce the same descriptor as minimal.
-.p2align 6
-.amdhsa_kernel disabled_user_sgpr
-  .amdhsa_user_sgpr_private_segment_buffer 0
-  .amdhsa_next_free_vgpr 0
-  .amdhsa_next_free_sgpr 0
-.end_amdhsa_kernel
-
-// ASM: .amdhsa_kernel disabled_user_sgpr
-// ASM: .amdhsa_next_free_vgpr 0
-// ASM-NEXT: .amdhsa_next_free_sgpr 0
-// ASM: .end_amdhsa_kernel
-
-.section .foo
-
-.byte .amdgcn.gfx_generation_number
-// ASM: .byte 9
-
-.byte .amdgcn.gfx_generation_minor
-// ASM: .byte 0
-
-.byte .amdgcn.gfx_generation_stepping
-// ASM: .byte 4
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v7, s10
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 8
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 11
-
-.set .amdgcn.next_free_vgpr, 0
-.set .amdgcn.next_free_sgpr, 0
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 0
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 0
-
-v_mov_b32_e32 v16, s3
-
-.byte .amdgcn.next_free_vgpr
-// ASM: .byte 17
-.byte .amdgcn.next_free_sgpr
-// ASM: .byte 4
-
-// Metadata
-
-.amdgpu_metadata
-  amdhsa.version:
-    - 3
-    - 0
-  amdhsa.kernels:
-    - .name:       amd_kernel_code_t_test_all
-      .symbol: amd_kernel_code_t_test_all at kd
-      .kernarg_segment_size: 8
-      .group_segment_fixed_size: 16
-      .private_segment_fixed_size: 32
-      .kernarg_segment_align: 64
-      .wavefront_size: 128
-      .sgpr_count: 14
-      .vgpr_count: 40
-      .max_flat_workgroup_size: 256
-    - .name:       amd_kernel_code_t_minimal
-      .symbol: amd_kernel_code_t_minimal at kd
-      .kernarg_segment_size: 8
-      .group_segment_fixed_size: 16
-      .private_segment_fixed_size: 32
-      .kernarg_segment_align: 64
-      .wavefront_size: 128
-      .sgpr_count: 14
-      .vgpr_count: 40
-      .max_flat_workgroup_size: 256
-.end_amdgpu_metadata
-
-// ASM:      	.amdgpu_metadata
-// ASM:      amdhsa.kernels:
-// ASM:        - .group_segment_fixed_size: 16
-// ASM:          .kernarg_segment_align: 64
-// ASM:          .kernarg_segment_size: 8
-// ASM:          .max_flat_workgroup_size: 256
-// ASM:          .name:           amd_kernel_code_t_test_all
-// ASM:          .private_segment_fixed_size: 32
-// ASM:          .sgpr_count:     14
-// ASM:          .symbol:         'amd_kernel_code_t_test_all at kd'
-// ASM:          .vgpr_count:     40
-// ASM:          .wavefront_size: 128
-// ASM:        - .group_segment_fixed_size: 16
-// ASM:          .kernarg_segment_align: 64
-// ASM:          .kernarg_segment_size: 8
-// ASM:          .max_flat_workgroup_size: 256
-// ASM:          .name:           amd_kernel_code_t_minimal
-// ASM:          .private_segment_fixed_size: 32
-// ASM:          .sgpr_count:     14
-// ASM:          .symbol:         'amd_kernel_code_t_minimal at kd'
-// ASM:          .vgpr_count:     40
-// ASM:          .wavefront_size: 128
-// ASM:      amdhsa.version:
-// ASM-NEXT:   - 3
-// ASM-NEXT:   - 0
-// ASM:      	.end_amdgpu_metadata

diff  --git a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
index 63e532e0ffa3768..7e3ae8424cc7bdc 100644
--- a/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count-diag.s
@@ -1,4 +1,4 @@
-// RUN: not llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s
+// RUN: not llvm-mc --amdhsa-code-object-version=4 -triple amdgcn-amd-amdhsa -mcpu=gfx90a %s 2>&1 >/dev/null | FileCheck -check-prefix=ERR %s
 
 .amdhsa_kernel implied_count_too_low_0
   .amdhsa_user_sgpr_count 0

diff  --git a/llvm/test/MC/AMDGPU/user-sgpr-count.s b/llvm/test/MC/AMDGPU/user-sgpr-count.s
index aa8970185eb04bc..950c514f786b254 100644
--- a/llvm/test/MC/AMDGPU/user-sgpr-count.s
+++ b/llvm/test/MC/AMDGPU/user-sgpr-count.s
@@ -1,10 +1,10 @@
-// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a --amdhsa-code-object-version=3 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc -triple amdgcn-amd-amdhsa -mcpu=gfx90a --amdhsa-code-object-version=4 -mattr=+xnack < %s | FileCheck --check-prefix=ASM %s
 
 .text
 // ASM: .text
 
-.amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc"
-// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a+xnack+sram-ecc"
+.amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx90a:xnack+"
 
 
 // ASM-LABEL: .amdhsa_kernel user_sgprs_implied_count


        


More information about the cfe-commits mailing list