[clang] [llvm] [AMDGPU] Introduce GFX9/10.1/10.3/11 Generic Targets (PR #76955)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Fri Feb 9 01:18:24 PST 2024
https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/76955
>From 73a30064b9229b107fee8ea8938f641186778bd0 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Thu, 4 Jan 2024 14:48:05 +0100
Subject: [PATCH 1/7] [AMDGPU] Introduce GFX9/10.1/10.3/11 Generic Targets
These generic targets include multiple GPUs and will, in the future, provide a way to build once and run on multiple GPU, at the cost of less optimization opportunities.
Note that this is just doing the compiler side of things, device libs an runtimes/loader/etc. don't know about these targets yet, so none of them actually work in practice right now. This is just the initial commit to make LLVM aware of them.
No docs in this patch either as I plan to do it all in a follow-up patch.
---
clang/lib/Basic/Targets/AMDGPU.cpp | 20 +-
clang/test/Driver/amdgpu-macros.cl | 5 +
clang/test/Driver/amdgpu-mcpu.cl | 10 +
llvm/docs/AMDGPUUsage.rst | 325 +++++++++++++-----
llvm/include/llvm/BinaryFormat/ELF.h | 6 +-
llvm/include/llvm/TargetParser/TargetParser.h | 10 +
llvm/lib/Object/ELFObjectFile.cpp | 10 +
llvm/lib/ObjectYAML/ELFYAML.cpp | 4 +
llvm/lib/Target/AMDGPU/AMDGPU.td | 87 +++--
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 6 +
.../AMDGPURemoveIncompatibleFunctions.cpp | 6 +-
llvm/lib/Target/AMDGPU/GCNProcessors.td | 22 ++
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 4 +
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 26 ++
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 11 +
llvm/lib/TargetParser/TargetParser.cpp | 46 +++
.../GlobalISel/llvm.amdgcn.workitem.id.ll | 1 +
.../CodeGen/AMDGPU/directive-amdgcn-target.ll | 14 +
.../CodeGen/AMDGPU/elf-header-flags-mach.ll | 10 +
llvm/test/CodeGen/AMDGPU/gds-allocation.ll | 1 +
llvm/test/CodeGen/AMDGPU/gds-atomic.ll | 1 +
.../AMDGPU/generic-targets-require-v6.ll | 18 +
.../AMDGPU/hsa-generic-target-features.ll | 31 ++
.../llvm.amdgcn.image.gather4.d16.dim.ll | 3 +
.../AMDGPU/llvm.amdgcn.image.sample.dim.ll | 3 +
.../AMDGPU/unsupported-image-sample.ll | 12 +-
.../Object/AMDGPU/elf-header-flags-mach.yaml | 29 ++
.../llvm-objdump/ELF/AMDGPU/subtarget.ll | 20 ++
.../llvm-readobj/ELF/AMDGPU/elf-headers.test | 12 +
llvm/tools/llvm-readobj/ELFDumper.cpp | 128 +++----
30 files changed, 689 insertions(+), 192 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
create mode 100644 llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 141501e8a4d9a1..799634ccec7ba5 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -279,13 +279,25 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
return;
- StringRef CanonName = isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
- : getArchNameR600(GPUKind);
+ std::string CanonName = (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
+ : getArchNameR600(GPUKind))
+ .str();
+
+ // Sanitize the name of generic targets.
+ // e.g. gfx10.1-generic -> gfx10_1_generic
+ if (GPUKind >= llvm::AMDGPU::GK_AMDGCN_GENERIC_FIRST &&
+ GPUKind <= llvm::AMDGPU::GK_AMDGCN_GENERIC_LAST) {
+ std::replace(CanonName.begin(), CanonName.end(), '.', '_');
+ std::replace(CanonName.begin(), CanonName.end(), '-', '_');
+ }
+
Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
// Emit macros for gfx family e.g. gfx906 -> __GFX9__, gfx1030 -> __GFX10___
if (isAMDGCN(getTriple()) && !IsHIPHost) {
- assert(CanonName.starts_with("gfx") && "Invalid amdgcn canonical name");
- Builder.defineMacro(Twine("__") + Twine(CanonName.drop_back(2).upper()) +
+ assert(StringRef(CanonName).starts_with("gfx") &&
+ "Invalid amdgcn canonical name");
+ StringRef CanonFamilyName = getArchFamilyNameAMDGCN(GPUKind);
+ Builder.defineMacro(Twine("__") + Twine(CanonFamilyName.upper()) +
Twine("__"));
Builder.defineMacro("__amdgcn_processor__",
Twine("\"") + Twine(CanonName) + Twine("\""));
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index 81c22af460d12d..3b10444ef71d36 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -131,6 +131,11 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1200 -DFAMILY=GFX12
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1201 -DFAMILY=GFX12
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx9_generic -DFAMILY=GFX9
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_1_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx10_3_generic -DFAMILY=GFX10
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx11_generic -DFAMILY=GFX11
+
// ARCH-GCN-DAG: #define FP_FAST_FMA 1
// FAST_FMAF-DAG: #define FP_FAST_FMAF 1
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index eeb16ae98ebad7..6f18ea0615cb69 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -115,6 +115,11 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx1200 %s 2>&1 | FileCheck --check-prefix=GFX1200 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1201 %s 2>&1 | FileCheck --check-prefix=GFX1201 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx9-generic %s 2>&1 | FileCheck --check-prefix=GFX9_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.1-generic %s 2>&1 | FileCheck --check-prefix=GFX10_1_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx10.3-generic %s 2>&1 | FileCheck --check-prefix=GFX10_3_GENERIC %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx11-generic %s 2>&1 | FileCheck --check-prefix=GFX11_GENERIC %s
+
// GCNDEFAULT-NOT: -target-cpu
// GFX600: "-target-cpu" "gfx600"
// GFX601: "-target-cpu" "gfx601"
@@ -160,3 +165,8 @@
// GFX1151: "-target-cpu" "gfx1151"
// GFX1200: "-target-cpu" "gfx1200"
// GFX1201: "-target-cpu" "gfx1201"
+
+// GFX9_GENERIC: "-target-cpu" "gfx9-generic"
+// GFX10_1_GENERIC: "-target-cpu" "gfx10.1-generic"
+// GFX10_3_GENERIC: "-target-cpu" "gfx10.3-generic"
+// GFX11_GENERIC: "-target-cpu" "gfx11-generic"
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 301996847a5841..4f4674b5b1aea7 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -520,6 +520,104 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
=========== =============== ============ ===== ================= =============== =============== ======================
+Generic processors also exist.
+Generic processor code objects can be executed on any of the processors that
+are supported by the generic processor. Such code objects may not perform as well
+as those for the non-generic processors.
+
+Generic processors are only available on code object V6 and above (see :ref:`amdgpu-elf-code-object`).
+
+Generic processor code objects are versioned (see :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`).
+The version number is used by runtimes to determine if a code object can be run on a specific agent.
+The version number may be increased as-needed, e.g., when a major codegen change occurs for a generic
+target.
+
+ .. table:: AMDGPU Generic Processors
+ :name: amdgpu-generic-processor-table
+
+ ==================== ============== ================= ================== ================= =================================
+ Processor Target Supported Target Features Target Properties Target Restrictions
+ Triple Processors Supported
+ Architecture
+
+ ==================== ============== ================= ================== ================= =================================
+ ``gfx9-generic`` ``amdgcn`` - ``gfx900`` - xnack - Absolute flat - ``v_mad_mix`` instructions
+ - ``gfx902`` scratch are not available on
+ - ``gfx904`` ``gfx900``, ``gfx902``,
+ - ``gfx906`` ``gfx909``, ``gfx90c``
+ - ``gfx909`` - ``v_fma_mix`` instructions
+ - ``gfx90c`` are not available on ``gfx904``
+ - sramecc is not available on
+ ``gfx906``
+ - The following instructions
+ are not available on ``gfx906``:
+
+ - ``v_fmac_f32``
+ - ``v_xnor_b32``
+ - ``v_dot4_i32_i8``
+ - ``v_dot8_i32_i4``
+ - ``v_dot2_i32_i16``
+ - ``v_dot2_u32_u16``
+ - ``v_dot4_u32_u8``
+ - ``v_dot8_u32_u4``
+ - ``v_dot2_f32_f16``
+
+
+ ``gfx10.1-generic`` ``amdgcn`` - ``gfx1010`` - xnack - Absolute flat - The following instructions are
+ - ``gfx1011`` - wavefrontsize64 scratch not available on ``gfx1011``
+ - ``gfx1012`` - cumode and ``gfx1012``
+ - ``gfx1013``
+ - ``v_dot4_i32_i8``
+ - ``v_dot8_i32_i4``
+ - ``v_dot2_i32_i16``
+ - ``v_dot2_u32_u16``
+ - ``v_dot2c_f32_f16``
+ - ``v_dot4c_i32_i8``
+ - ``v_dot4_u32_u8``
+ - ``v_dot8_u32_u4``
+ - ``v_dot2_f32_f16``
+
+ - BVH Ray Tracing instructions
+ are not available on
+ ``gfx1013``
+
+
+ ``gfx10.3-generic`` ``amdgcn`` - ``gfx1030`` - wavefrontsize64 - Absolute flat No restrictions.
+ - ``gfx1031`` - cumode scratch
+ - ``gfx1032``
+ - ``gfx1033``
+ - ``gfx1034``
+ - ``gfx1035``
+ - ``gfx1036``
+
+
+ ``gfx11-generic`` ``amdgcn`` - ``gfx1100`` - wavefrontsize64 - Architected Various codegen pessimizations
+ - ``gfx1101`` - cumode flat scratch are applied to work around some
+ - ``gfx1102`` - Packed hazards specific to some targets
+ - ``gfx1103`` work-item within this family.
+ - ``gfx1150`` IDs
+ - ``gfx1151`` Not all VGPRs can be used on:
+
+ - ``gfx1100``
+ - ``gfx1101``
+ - ``gfx1151``
+
+ SALU floating point instructions
+ and single-use VGPR hint
+ instructions are not available
+ on:
+
+ - ``gfx1150``
+ - ``gfx1151``
+
+ SGPRs are not supported for src1
+ in dpp instructions for:
+
+ - ``gfx1150``
+ - ``gfx1151``
+ ==================== ============== ================= ================== ================= =================================
+
+
.. _amdgpu-target-features:
Target Features
@@ -533,7 +631,7 @@ generating the code. A mismatch of features may result in incorrect
execution, or a reduction in performance.
The target features supported by each processor is listed in
-:ref:`amdgpu-processor-table`.
+:ref:`amdgpu-processors`.
Target features are controlled by exactly one of the following Clang
options:
@@ -1443,6 +1541,7 @@ The AMDGPU backend uses the following ELF header:
- ``ELFABIVERSION_AMDGPU_HSA_V3``
- ``ELFABIVERSION_AMDGPU_HSA_V4``
- ``ELFABIVERSION_AMDGPU_HSA_V5``
+ - ``ELFABIVERSION_AMDGPU_HSA_V6``
- ``ELFABIVERSION_AMDGPU_PAL``
- ``ELFABIVERSION_AMDGPU_MESA3D``
``e_type`` - ``ET_REL``
@@ -1451,7 +1550,8 @@ The AMDGPU backend uses the following ELF header:
``e_entry`` 0
``e_flags`` See :ref:`amdgpu-elf-header-e_flags-v2-table`,
:ref:`amdgpu-elf-header-e_flags-table-v3`,
- and :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`
+ :ref:`amdgpu-elf-header-e_flags-table-v4-v5`,
+ and :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`
========================== ===============================
..
@@ -1471,6 +1571,7 @@ The AMDGPU backend uses the following ELF header:
``ELFABIVERSION_AMDGPU_HSA_V3`` 1
``ELFABIVERSION_AMDGPU_HSA_V4`` 2
``ELFABIVERSION_AMDGPU_HSA_V5`` 3
+ ``ELFABIVERSION_AMDGPU_HSA_V6`` 4
``ELFABIVERSION_AMDGPU_PAL`` 0
``ELFABIVERSION_AMDGPU_MESA3D`` 0
=============================== =====
@@ -1517,6 +1618,10 @@ The AMDGPU backend uses the following ELF header:
``-mcode-object-version=5``. This is the default code object
version if not specified.
+ * ``ELFABIVERSION_AMDGPU_HSA_V6`` is used to specify the version of AMD HSA
+ runtime ABI for code object V6. Specify using the Clang option
+ ``-mcode-object-version=6``.
+
* ``ELFABIVERSION_AMDGPU_PAL`` is used to specify the version of AMD PAL
runtime ABI.
@@ -1543,8 +1648,9 @@ The AMDGPU backend uses the following ELF header:
``NT_AMD_HSA_ISA_VERSION`` note record for code object V2 (see
:ref:`amdgpu-note-records-v2`) and in the ``EF_AMDGPU_MACH`` bit field of the
``e_flags`` for code object V3 and above (see
- :ref:`amdgpu-elf-header-e_flags-table-v3` and
- :ref:`amdgpu-elf-header-e_flags-table-v4-onwards`).
+ :ref:`amdgpu-elf-header-e_flags-table-v3`,
+ :ref:`amdgpu-elf-header-e_flags-table-v4-v5` and
+ :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`).
``e_entry``
The entry point is 0 as the entry points for individual kernels must be
@@ -1615,8 +1721,8 @@ The AMDGPU backend uses the following ELF header:
:ref:`amdgpu-target-features`.
================================= ===== =============================
- .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and After
- :name: amdgpu-elf-header-e_flags-table-v4-onwards
+ .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V4 and V5
+ :name: amdgpu-elf-header-e_flags-table-v4-v5
============================================ ===== ===================================
Name Value Description
@@ -1642,80 +1748,118 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled.
============================================ ===== ===================================
+ .. table:: AMDGPU ELF Header ``e_flags`` for Code Object V6 and After
+ :name: amdgpu-elf-header-e_flags-table-v6-onwards
+
+ ============================================ ========== =========================================
+ Name Value Description
+ ============================================ ========== =========================================
+ ``EF_AMDGPU_MACH`` 0x0ff AMDGPU processor selection
+ mask for
+ ``EF_AMDGPU_MACH_xxx`` values
+ defined in
+ :ref:`amdgpu-ef-amdgpu-mach-table`.
+ ``EF_AMDGPU_FEATURE_XNACK_V4`` 0x300 XNACK selection mask for
+ ``EF_AMDGPU_FEATURE_XNACK_*_V4``
+ values.
+ ``EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4`` 0x000 XNACK unsupported.
+ ``EF_AMDGPU_FEATURE_XNACK_ANY_V4`` 0x100 XNACK can have any value.
+ ``EF_AMDGPU_FEATURE_XNACK_OFF_V4`` 0x200 XNACK disabled.
+ ``EF_AMDGPU_FEATURE_XNACK_ON_V4`` 0x300 XNACK enabled.
+ ``EF_AMDGPU_FEATURE_SRAMECC_V4`` 0xc00 SRAMECC selection mask for
+ ``EF_AMDGPU_FEATURE_SRAMECC_*_V4``
+ values.
+ ``EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4`` 0x000 SRAMECC unsupported.
+ ``EF_AMDGPU_FEATURE_SRAMECC_ANY_V4`` 0x400 SRAMECC can have any value.
+ ``EF_AMDGPU_FEATURE_SRAMECC_OFF_V4`` 0x800 SRAMECC disabled,
+ ``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled.
+ ``EF_AMDGPU_GENERIC_VERSION_V<X>`` 0x01000000 Value between 1 and 255 for generic code
+ to object versioning, stored in the MSB.
+ 0xff000000 See :ref:`amdgpu-generic-processor-table`
+ ============================================ ========== =========================================
+
.. table:: AMDGPU ``EF_AMDGPU_MACH`` Values
:name: amdgpu-ef-amdgpu-mach-table
- ==================================== ========== =============================
- Name Value Description (see
- :ref:`amdgpu-processor-table`)
- ==================================== ========== =============================
- ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified*
- ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600``
- ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630``
- ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880``
- ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670``
- ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710``
- ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730``
- ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770``
- ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar``
- ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress``
- ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper``
- ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood``
- ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo``
- ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts``
- ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos``
- ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman``
- ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks``
- *reserved* 0x011 - Reserved for ``r600``
- 0x01f architecture processors.
- ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600``
- ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601``
- ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700``
- ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701``
- ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702``
- ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703``
- ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704``
- *reserved* 0x027 Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801``
- ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802``
- ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803``
- ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810``
- ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900``
- ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902``
- ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904``
- ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906``
- ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908``
- ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909``
- ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033``
- ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602``
- ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705``
- ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
- ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
- ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102``
- ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
- *reserved* 0x049 Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
- ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
- ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
- *reserved* 0x04d Reserved.
- ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
- ==================================== ========== =============================
+ ========================================== ========== =============================
+ Name Value Description (see
+ :ref:`amdgpu-processor-table`)
+ ========================================== ========== =============================
+ ``EF_AMDGPU_MACH_NONE`` 0x000 *not specified*
+ ``EF_AMDGPU_MACH_R600_R600`` 0x001 ``r600``
+ ``EF_AMDGPU_MACH_R600_R630`` 0x002 ``r630``
+ ``EF_AMDGPU_MACH_R600_RS880`` 0x003 ``rs880``
+ ``EF_AMDGPU_MACH_R600_RV670`` 0x004 ``rv670``
+ ``EF_AMDGPU_MACH_R600_RV710`` 0x005 ``rv710``
+ ``EF_AMDGPU_MACH_R600_RV730`` 0x006 ``rv730``
+ ``EF_AMDGPU_MACH_R600_RV770`` 0x007 ``rv770``
+ ``EF_AMDGPU_MACH_R600_CEDAR`` 0x008 ``cedar``
+ ``EF_AMDGPU_MACH_R600_CYPRESS`` 0x009 ``cypress``
+ ``EF_AMDGPU_MACH_R600_JUNIPER`` 0x00a ``juniper``
+ ``EF_AMDGPU_MACH_R600_REDWOOD`` 0x00b ``redwood``
+ ``EF_AMDGPU_MACH_R600_SUMO`` 0x00c ``sumo``
+ ``EF_AMDGPU_MACH_R600_BARTS`` 0x00d ``barts``
+ ``EF_AMDGPU_MACH_R600_CAICOS`` 0x00e ``caicos``
+ ``EF_AMDGPU_MACH_R600_CAYMAN`` 0x00f ``cayman``
+ ``EF_AMDGPU_MACH_R600_TURKS`` 0x010 ``turks``
+ *reserved* 0x011 - Reserved for ``r600``
+ 0x01f architecture processors.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX600`` 0x020 ``gfx600``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX601`` 0x021 ``gfx601``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX700`` 0x022 ``gfx700``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX701`` 0x023 ``gfx701``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX702`` 0x024 ``gfx702``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX703`` 0x025 ``gfx703``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX704`` 0x026 ``gfx704``
+ *reserved* 0x027 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX801`` 0x028 ``gfx801``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX802`` 0x029 ``gfx802``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX803`` 0x02a ``gfx803``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX810`` 0x02b ``gfx810``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX900`` 0x02c ``gfx900``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX902`` 0x02d ``gfx902``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX904`` 0x02e ``gfx904``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX906`` 0x02f ``gfx906``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX908`` 0x030 ``gfx908``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX909`` 0x031 ``gfx909``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX90C`` 0x032 ``gfx90c``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1010`` 0x033 ``gfx1010``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1011`` 0x034 ``gfx1011``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1012`` 0x035 ``gfx1012``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1030`` 0x036 ``gfx1030``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1031`` 0x037 ``gfx1031``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1032`` 0x038 ``gfx1032``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1033`` 0x039 ``gfx1033``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX602`` 0x03a ``gfx602``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX705`` 0x03b ``gfx705``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX805`` 0x03c ``gfx805``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1103`` 0x044 ``gfx1103``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1036`` 0x045 ``gfx1036``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1101`` 0x046 ``gfx1101``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1102`` 0x047 ``gfx1102``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
+ *reserved* 0x049 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
+ *reserved* 0x04d Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
+ *reserved* 0x04f Reserved.
+ *reserved* 0x050 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC`` 0x051 ``gfx9-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC`` 0x052 ``gfx10.1-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC`` 0x053 ``gfx10.3-generic``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC`` 0x054 ``gfx11-generic``
+ ========================================== ========== =============================
Sections
--------
@@ -4143,6 +4287,33 @@ Code object V5 metadata is the same as
====================== ============== ========= ================================
+.. _amdgpu-amdhsa-code-object-metadata-v6:
+
+Code Object V6 Metadata
++++++++++++++++++++++++
+
+.. warning::
+ Code object V6 is not the default code object version emitted by this version
+ of LLVM.
+
+
+Code object V6 metadata is the same as
+:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table
+:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v6`.
+
+ .. table:: AMDHSA Code Object V6 Metadata Map Changes
+ :name: amdgpu-amdhsa-code-object-metadata-map-table-v6
+
+ ================= ============== ========= =======================================
+ String Key Value Type Required? Description
+ ================= ============== ========= =======================================
+ "amdhsa.version" sequence of Required - The first integer is the major
+ 2 integers version. Currently 1.
+ - The second integer is the minor
+ version. Currently 3.
+ ================= ============== ========= =======================================
+
+
..
Kernel Dispatch
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index efd41f9812baa0..3eddaee4f7d1ad 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -790,11 +790,15 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4F = 0x04f,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X50 = 0x050,
+ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC = 0x051,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC = 0x052,
+ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC = 0x053,
+ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC = 0x054,
// clang-format on
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX1201,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 6464285980f00a..7da11701fefb8b 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -111,6 +111,14 @@ enum GPUKind : uint32_t {
GK_AMDGCN_FIRST = GK_GFX600,
GK_AMDGCN_LAST = GK_GFX1201,
+
+ GK_GFX9_GENERIC = 192,
+ GK_GFX10_1_GENERIC = 193,
+ GK_GFX10_3_GENERIC = 194,
+ GK_GFX11_GENERIC = 195,
+
+ GK_AMDGCN_GENERIC_FIRST = GK_GFX9_GENERIC,
+ GK_AMDGCN_GENERIC_LAST = GK_GFX11_GENERIC,
};
/// Instruction set architecture version.
@@ -147,6 +155,8 @@ enum ArchFeatureKind : uint32_t {
FEATURE_WGP = 1 << 9,
};
+StringRef getArchFamilyNameAMDGCN(GPUKind AK);
+
StringRef getArchNameAMDGCN(GPUKind AK);
StringRef getArchNameR600(GPUKind AK);
StringRef getCanonicalArchName(const Triple &T, StringRef Arch);
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 38a9e0e99b6b73..01949c6dad81fc 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -514,6 +514,16 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx1200";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201:
return "gfx1201";
+
+ // Generic AMDGCN targets
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC:
+ return "gfx9-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC:
+ return "gfx10.1-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC:
+ return "gfx10.3-generic";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC:
+ return "gfx11-generic";
default:
llvm_unreachable("Unknown EF_AMDGPU_MACH value");
}
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 1436e920c01124..de1ef2458152c8 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -612,6 +612,10 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1151, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1200, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1201, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, EF_AMDGPU_MACH);
switch (Object->Header.ABIVersion) {
default:
// ELFOSABI_AMDGPU_PAL, ELFOSABI_AMDGPU_MESA3D support *_V3 flags.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 55dbc1a803e13c..4ab2b124ef5304 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1002,6 +1002,12 @@ def FeatureGWS : SubtargetFeature<"gws",
"Has Global Wave Sync"
>;
+def FeatureRequiresCOV6 : SubtargetFeature<"requires-cov6",
+ "RequiresCOV6",
+ "true",
+ "Target Requires Code Object V6"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
@@ -1212,6 +1218,17 @@ def FeatureISAVersion9_0_Common : FeatureSet<
FeatureImageInsts,
FeatureMadMacF32Insts]>;
+def FeatureISAVersion9_0_Consumer_Common : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Common.Features,
+ [FeatureImageGather4D16Bug,
+ FeatureDsSrc2Insts,
+ FeatureExtendedImageInsts,
+ FeatureGDS])>;
+
+def FeatureISAVersion9_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion9_0_MI_Common : FeatureSet<
!listconcat(FeatureISAVersion9_0_Common.Features,
[FeatureFmaMixInsts,
@@ -1230,43 +1247,27 @@ def FeatureISAVersion9_0_MI_Common : FeatureSet<
FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_0 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_2 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_0_4 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureFmaMixInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureFmaMixInsts])>;
def FeatureISAVersion9_0_6 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- HalfRate64Ops,
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [HalfRate64Ops,
FeatureFmaMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
FeatureDLInsts,
FeatureDot1Insts,
FeatureDot2Insts,
FeatureDot7Insts,
FeatureDot10Insts,
- FeatureSupportsSRAMECC,
- FeatureImageGather4D16Bug])>;
+ FeatureSupportsSRAMECC])>;
def FeatureISAVersion9_0_8 : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1279,13 +1280,9 @@ def FeatureISAVersion9_0_8 : FeatureSet<
FeatureImageGather4D16Bug])>;
def FeatureISAVersion9_0_9 : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts,
+ FeatureImageInsts])>;
def FeatureISAVersion9_0_A : FeatureSet<
!listconcat(FeatureISAVersion9_0_MI_Common.Features,
@@ -1301,12 +1298,8 @@ def FeatureISAVersion9_0_A : FeatureSet<
FeatureKernargPreload])>;
def FeatureISAVersion9_0_C : FeatureSet<
- !listconcat(FeatureISAVersion9_0_Common.Features,
- [FeatureGDS,
- FeatureMadMixInsts,
- FeatureDsSrc2Insts,
- FeatureExtendedImageInsts,
- FeatureImageGather4D16Bug])>;
+ !listconcat(FeatureISAVersion9_0_Consumer_Common.Features,
+ [FeatureMadMixInsts])>;
def FeatureISAVersion9_4_Common : FeatureSet<
[FeatureGFX9,
@@ -1387,6 +1380,10 @@ def FeatureISAVersion10_1_Common : FeatureSet<
FeatureFlatSegmentOffsetBug,
FeatureNegativeUnalignedScratchOffsetBug])>;
+def FeatureISAVersion10_1_Generic : FeatureSet<
+ !listconcat(FeatureISAVersion10_1_Common.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion10_1_0 : FeatureSet<
!listconcat(FeatureISAVersion10_1_Common.Features,
[])>;
@@ -1426,6 +1423,10 @@ def FeatureISAVersion10_3_0 : FeatureSet<
FeatureDot10Insts,
FeatureShaderCyclesRegister])>;
+def FeatureISAVersion10_3_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion10_3_0.Features,
+ [FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_Common : FeatureSet<
[FeatureGFX11,
FeatureLDSBankCount32,
@@ -1447,6 +1448,16 @@ def FeatureISAVersion11_Common : FeatureSet<
FeaturePackedTID,
FeatureVcmpxPermlaneHazard]>;
+// There are few workarounds that need to be
+// added to all targets. This pessimizes codegen
+// a bit on the generic GFX11 target.
+def FeatureISAVersion11_Generic: FeatureSet<
+ !listconcat(FeatureISAVersion11_Common.Features,
+ [FeatureMSAALoadDstSelBug,
+ FeatureVALUTransUseHazard,
+ FeatureUserSGPRInit16Bug,
+ FeatureRequiresCOV6])>;
+
def FeatureISAVersion11_0_Common : FeatureSet<
!listconcat(FeatureISAVersion11_Common.Features,
[FeatureMSAALoadDstSelBug,
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index db81e1ee9e3899..e5bc0233f534bd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -156,6 +156,12 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const GCNSubtarget &STM = MF->getSubtarget<GCNSubtarget>();
const Function &F = MF->getFunction();
+ // TODO: We're checking this late, would be nice to check it earlier.
+ if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6)
+ report_fatal_error(
+ STM.getCPU() + " is only available on code object version 6 or better",
+ /*gen_crash_diag*/ false);
+
// TODO: Which one is called first, emitStartOfAsmFile or
// emitFunctionBodyStart?
if (!getTargetStreamer()->getTargetID())
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
index 6f1236fd3b7dae..9d44b65d1698ca 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURemoveIncompatibleFunctions.cpp
@@ -139,10 +139,10 @@ bool AMDGPURemoveIncompatibleFunctions::checkFunction(Function &F) {
const GCNSubtarget *ST =
static_cast<const GCNSubtarget *>(TM->getSubtargetImpl(F));
- // Check the GPU isn't generic. Generic is used for testing only
- // and we don't want this pass to interfere with it.
+ // Check the GPU isn't generic or generic-hsa. Generic is used for testing
+ // only and we don't want this pass to interfere with it.
StringRef GPUName = ST->getCPU();
- if (GPUName.empty() || GPUName.contains("generic"))
+ if (GPUName.empty() || GPUName.starts_with("generic"))
return false;
// Try to fetch the GPU's info. If we can't, it's likely an unknown processor
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 96af1a6aab3da7..4671e03d43b3ab 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -204,6 +204,11 @@ def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
+// [gfx900, gfx902, gfx904, gfx906, gfx909, gfx90c]
+def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
+ FeatureISAVersion9_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
@@ -252,6 +257,16 @@ def : ProcessorModel<"gfx1036", GFX10SpeedModel,
FeatureISAVersion10_3_0.Features
>;
+// [gfx1010, gfx1011, gfx1012, gfx1013]
+def : ProcessorModel<"gfx10.1-generic", GFX10SpeedModel,
+ FeatureISAVersion10_1_Generic.Features
+>;
+
+// [gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036]
+def : ProcessorModel<"gfx10.3-generic", GFX10SpeedModel,
+ FeatureISAVersion10_3_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX11.
//===----------------------------------------------------------------------===//
@@ -280,10 +295,17 @@ def : ProcessorModel<"gfx1151", GFX11SpeedModel,
FeatureISAVersion11_5_1.Features
>;
+// [gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151]
+def : ProcessorModel<"gfx11-generic", GFX11SpeedModel,
+ FeatureISAVersion11_Generic.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX12.
//===----------------------------------------------------------------------===//
+// TODO: gfx12-generic ?
+
def : ProcessorModel<"gfx1200", GFX12SpeedModel,
FeatureISAVersion12.Features
>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 4f8eeaaf500b4d..b13b4f7d8f9a2b 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -224,6 +224,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasVALUTransUseHazard = false;
bool HasForceStoreSC0SC1 = false;
+ bool RequiresCOV6 = false;
+
// Dummy feature to use for assembler in tablegen.
bool FeatureDisable = false;
@@ -1165,6 +1167,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }
+ bool requiresCodeObjectV6() const { return RequiresCOV6; }
+
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
/// Return if operations acting on VGPR tuples require even alignment.
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 5e9b1674d87dcb..a25622c06bfa9e 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -115,6 +115,10 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151: AK = GK_GFX1151; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200: AK = GK_GFX1200; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201: AK = GK_GFX1201; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC: AK = GK_GFX9_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC: AK = GK_GFX10_1_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC: AK = GK_GFX10_3_GENERIC; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC: AK = GK_GFX11_GENERIC; break;
case ELF::EF_AMDGPU_MACH_NONE: AK = GK_NONE; break;
default: AK = GK_NONE; break;
}
@@ -193,6 +197,10 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX1151: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1151;
case GK_GFX1200: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1200;
case GK_GFX1201: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1201;
+ case GK_GFX9_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC;
+ case GK_GFX10_1_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC;
+ case GK_GFX10_3_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC;
+ case GK_GFX11_GENERIC: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC;
case GK_NONE: return ELF::EF_AMDGPU_MACH_NONE;
}
// clang-format on
@@ -659,6 +667,24 @@ unsigned AMDGPUTargetELFStreamer::getEFlagsV6() {
unsigned Flags = getEFlagsV4();
unsigned Version = ForceGenericVersion;
+ if (!Version) {
+ switch (parseArchAMDGCN(STI.getCPU())) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ Version = GenericVersion::GFX9;
+ break;
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ Version = GenericVersion::GFX10_1;
+ break;
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ Version = GenericVersion::GFX10_3;
+ break;
+ case AMDGPU::GK_GFX11_GENERIC:
+ Version = GenericVersion::GFX11;
+ break;
+ default:
+ break;
+ }
+ }
// Versions start at 1.
if (Version) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index f24b9f0e3615de..ded252c81af3ed 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -42,6 +42,17 @@ namespace AMDGPU {
struct IsaVersion;
+/// Generic target versions emitted by this version of LLVM.
+///
+/// These numbers are incremented every time a codegen breaking change occurs
+/// within a generic family.
+namespace GenericVersion {
+static constexpr unsigned GFX9 = 1;
+static constexpr unsigned GFX10_1 = 1;
+static constexpr unsigned GFX10_3 = 1;
+static constexpr unsigned GFX11 = 1;
+} // namespace GenericVersion
+
enum { AMDHSA_COV4 = 4, AMDHSA_COV5 = 5, AMDHSA_COV6 = 6 };
/// \returns True if \p STI is AMDHSA.
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 20f324604aa52a..684d698521e595 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -126,6 +126,11 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx1151"}, {"gfx1151"}, GK_GFX1151, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1200"}, {"gfx1200"}, GK_GFX1200, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
{{"gfx1201"}, {"gfx1201"}, GK_GFX1201, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+
+ {{"gfx9-generic"}, {"gfx9-generic"}, GK_GFX9_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx10.1-generic"}, {"gfx10.1-generic"}, GK_GFX10_1_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
+ {{"gfx10.3-generic"}, {"gfx10.3-generic"}, GK_GFX10_3_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
+ {{"gfx11-generic"}, {"gfx11-generic"}, GK_GFX11_GENERIC, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_WGP},
// clang-format on
};
@@ -144,6 +149,22 @@ const GPUInfo *getArchEntry(AMDGPU::GPUKind AK, ArrayRef<GPUInfo> Table) {
} // namespace
+StringRef llvm::AMDGPU::getArchFamilyNameAMDGCN(GPUKind AK) {
+ switch (AK) {
+ case AMDGPU::GK_GFX9_GENERIC:
+ return "gfx9";
+ case AMDGPU::GK_GFX10_1_GENERIC:
+ case AMDGPU::GK_GFX10_3_GENERIC:
+ return "gfx10";
+ case AMDGPU::GK_GFX11_GENERIC:
+ return "gfx11";
+ default: {
+ StringRef ArchName = getArchNameAMDGCN(AK);
+ return ArchName.empty() ? "" : ArchName.drop_back(2);
+ }
+ }
+}
+
StringRef llvm::AMDGPU::getArchNameAMDGCN(GPUKind AK) {
if (const auto *Entry = getArchEntry(AK, AMDGCNGPUs))
return Entry->CanonicalName;
@@ -253,6 +274,24 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX1151: return {11, 5, 1};
case GK_GFX1200: return {12, 0, 0};
case GK_GFX1201: return {12, 0, 1};
+
+ // Generic targets return the lowest common denominator
+ // within their family. That is, the ISA that is the most
+ // restricted in terms of features.
+ //
+ // gfx9-generic is tricky because there is no lowest
+ // common denominator, so we return gfx900 which has mad-mix
+ // but this family doesn't have it.
+ //
+ // This API should never be used to check for a particular
+ // feature anyway.
+ //
+ // TODO: Split up this API depending on its caller so
+ // generic target handling is more obvious and less risky.
+ case GK_GFX9_GENERIC: return {9, 0, 0};
+ case GK_GFX10_1_GENERIC: return {10, 1, 0};
+ case GK_GFX10_3_GENERIC: return {10, 3, 0};
+ case GK_GFX11_GENERIC: return {11, 0, 3};
default: return {0, 0, 0};
}
// clang-format on
@@ -302,6 +341,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1102:
case GK_GFX1101:
case GK_GFX1100:
+ case GK_GFX11_GENERIC:
Features["ci-insts"] = true;
Features["dot5-insts"] = true;
Features["dot7-insts"] = true;
@@ -327,6 +367,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX1032:
case GK_GFX1031:
case GK_GFX1030:
+ case GK_GFX10_3_GENERIC:
Features["ci-insts"] = true;
Features["dot1-insts"] = true;
Features["dot2-insts"] = true;
@@ -357,6 +398,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
[[fallthrough]];
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX10_1_GENERIC:
Features["dl-insts"] = true;
Features["ci-insts"] = true;
Features["16-bit-insts"] = true;
@@ -424,6 +466,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
case GK_GFX904:
case GK_GFX902:
case GK_GFX900:
+ case GK_GFX9_GENERIC:
Features["gfx9-insts"] = true;
[[fallthrough]];
case GK_GFX810:
@@ -510,6 +553,9 @@ static bool isWave32Capable(StringRef GPU, const Triple &T) {
case GK_GFX1011:
case GK_GFX1013:
case GK_GFX1010:
+ case GK_GFX11_GENERIC:
+ case GK_GFX10_3_GENERIC:
+ case GK_GFX10_1_GENERIC:
IsWave32Capable = true;
break;
default:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index 155f4c92f97d37..4e921f25620544 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -6,6 +6,7 @@
; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index 357fcf8ef15617..038219fc374047 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -108,6 +108,13 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1200 < %s | FileCheck --check-prefixes=GFX1200 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1201 < %s | FileCheck --check-prefixes=GFX1201 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_NOXNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx9-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX9_GENERIC_XNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_NOXNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+xnack < %s | FileCheck --check-prefixes=GFX10_1_GENERIC_XNACK %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck --check-prefixes=GFX10_3_GENERIC %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck --check-prefixes=GFX11_GENERIC %s
+
; GFX600: .amdgcn_target "amdgcn-amd-amdhsa--gfx600"
; GFX601: .amdgcn_target "amdgcn-amd-amdhsa--gfx601"
; GFX602: .amdgcn_target "amdgcn-amd-amdhsa--gfx602"
@@ -196,6 +203,13 @@
; GFX1200: .amdgcn_target "amdgcn-amd-amdhsa--gfx1200"
; GFX1201: .amdgcn_target "amdgcn-amd-amdhsa--gfx1201"
+; GFX9_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack-"
+; GFX9_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx9-generic:xnack+"
+; GFX10_1_GENERIC_NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack-"
+; GFX10_1_GENERIC_XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.1-generic:xnack+"
+; GFX10_3_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx10.3-generic"
+; GFX11_GENERIC: .amdgcn_target "amdgcn-amd-amdhsa--gfx11-generic"
+
define amdgpu_kernel void @directive_amdgcn_target() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index 380439d8cd9c65..9ba8176947174c 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -77,6 +77,11 @@
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1200 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1200 %s
; RUN: llc -filetype=obj -mtriple=amdgcn -mcpu=gfx1201 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1201 %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx9-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX9_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.1-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_1_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx10.3-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX10_3_GENERIC %s
+; RUN: llc -filetype=obj --amdhsa-code-object-version=6 -mtriple=amdgcn -mcpu=gfx11-generic < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX11_GENERIC %s
+
; FIXME: With the default attributes the eflags are not accurate for
; xnack and sramecc. Subsequent Target-ID patches will address this.
@@ -149,6 +154,11 @@
; GFX1151: EF_AMDGPU_MACH_AMDGCN_GFX1151 (0x4A)
; GFX1200: EF_AMDGPU_MACH_AMDGCN_GFX1200 (0x48)
; GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
+
+; GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
+; GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
+; GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53)
+; GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54)
; ALL: ]
define amdgpu_kernel void @elf_header() {
diff --git a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
index dc6fea4ba1a379..1a9334706cb927 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-allocation.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx9-generic --amdhsa-code-object-version=6 -amdgpu-atomic-optimizer-strategy=None -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
@gds0 = internal addrspace(2) global [4 x i32] undef, align 4
@lds0 = internal addrspace(3) global [4 x i32] undef, align 128
diff --git a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
index 3e4e6938d72eb1..8d44330b1b9733 100644
--- a/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/gds-atomic.ll
@@ -2,6 +2,7 @@
; RUN: llc -mtriple=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,FUNC %s
; FUNC-LABEL: {{^}}atomic_add_ret_gds:
; GCN-DAG: v_mov_b32_e32 v[[OFF:[0-9]+]], s
diff --git a/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
new file mode 100644
index 00000000000000..e3f4b14bac0c16
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/generic-targets-require-v6.ll
@@ -0,0 +1,18 @@
+; RUN: not llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX9-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX101-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX103-V5 %s
+; RUN: not llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=5 -o - %s 2>&1 | FileCheck --check-prefix=GFX11-V5 %s
+
+; RUN: llc -march=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx10.3-generic --amdhsa-code-object-version=6 -o - %s
+; RUN: llc -march=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -o - %s
+
+; GFX9-V5: gfx9-generic is only available on code object version 6 or better
+; GFX101-V5: gfx10.1-generic is only available on code object version 6 or better
+; GFX103-V5: gfx10.3-generic is only available on code object version 6 or better
+; GFX11-V5: gfx11-generic is only available on code object version 6 or better
+
+define void @foo() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
new file mode 100644
index 00000000000000..4fee563d1cc93b
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/hsa-generic-target-features.ll
@@ -0,0 +1,31 @@
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic < %s | FileCheck -check-prefix=CU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic < %s | FileCheck -check-prefix=CU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+cumode < %s | FileCheck -check-prefix=NOCU %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic < %s | FileCheck -check-prefix=CU %s
+
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.1-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx10.3-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=+wavefrontsize32,-wavefrontsize64 < %s | FileCheck -check-prefix=W32 %s
+; RUN: llc --amdhsa-code-object-version=6 -mtriple=amdgcn-amd-amdhsa -mcpu=gfx11-generic -mattr=-wavefrontsize32,+wavefrontsize64 < %s | FileCheck -check-prefix=W64 %s
+
+; Checks 10.1, 10.3 and 11 generic targets allow cumode/wave64.
+
+; NOCU: .amdhsa_workgroup_processor_mode 0
+; NOCU: .workgroup_processor_mode: 0
+; CU: .amdhsa_workgroup_processor_mode 1
+; CU: .workgroup_processor_mode: 1
+
+; W64: .amdhsa_wavefront_size32 0
+; W32: .amdhsa_wavefront_size32 1
+
+define amdgpu_kernel void @wavefrontsize() {
+entry:
+ ret void
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"amdgpu_code_object_version", i32 600}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
index 91284d3838675c..cf324d62e1de1e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.gather4.d16.dim.ll
@@ -1,8 +1,11 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=GCN,UNPACKED %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx810 -verify-machineinstrs | FileCheck --check-prefix=GCN %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX10 %s
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs | FileCheck -check-prefixes=GCN,GFX12 %s
; GCN-LABEL: {{^}}image_gather4_b_2d_v4f16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
index 7dc139ef969740..10e1ae3ecfcbd4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.image.sample.dim.ll
@@ -1,8 +1,11 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx10.1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s
define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) {
diff --git a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
index 0878fc689cd4bd..b08586efe2f217 100644
--- a/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
+++ b/llvm/test/CodeGen/AMDGPU/unsupported-image-sample.ll
@@ -1,15 +1,13 @@
-; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906 %s
-; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s
; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX90A %s
; RUN: not --crash llc -O0 -mtriple=amdgcn -mcpu=gfx940 -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefixes=GFX940 %s
; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1030 %s
; RUN: llc -O0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100 %s
-; GFX906-LABEL: image_sample_test:
-; GFX906: image_sample_lz
-
-; GFX908-LABEL: image_sample_test:
-; GFX908: image_sample_lz
+; GFX9-LABEL: image_sample_test:
+; GFX9: image_sample_lz
; GFX90A: LLVM ERROR: requested image instruction is not supported on this GPU
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 7fb33ca662b190..4c2b4479a4aa35 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -238,6 +238,23 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1201 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1201 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1201 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX9_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX9_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX9_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX9_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_1_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_1_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_1_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_1_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX10_3_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX10_3_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX10_3_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX10_3_GENERIC %s
+
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX11_GENERIC/' %s | yaml2obj -o %t.o.AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX11_GENERIC %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX11_GENERIC | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX11_GENERIC %s
+
+
# ELF-R600-ALL: Format: elf32-amdgpu
# ELF-R600-ALL: Arch: r600
# ELF-R600-ALL: AddressSize: 32bit
@@ -435,6 +452,18 @@
# ELF-AMDGCN-GFX1201: EF_AMDGPU_MACH_AMDGCN_GFX1201 (0x4E)
# YAML-AMDGCN-GFX1201: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1201 ]
+# ELF-AMDGCN-GFX9_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC (0x51)
+# YAML-AMDGCN-GFX9_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC ]
+
+# ELF-AMDGCN-GFX10_1_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC (0x52)
+# YAML-AMDGCN-GFX10_1_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC ]
+
+# ELF-AMDGCN-GFX10_3_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC (0x53)
+# YAML-AMDGCN-GFX10_3_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC ]
+
+# ELF-AMDGCN-GFX11_GENERIC: EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC (0x54)
+# YAML-AMDGCN-GFX11_GENERIC: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC ]
+
# ELF-AMDGCN-ALL: ]
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index e296d7fb1fc8f2..ca136a6a0d5ba2 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -18,6 +18,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX11--------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx11-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1151 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1151 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -49,6 +54,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: diff %t-specify.txt %t-detect.txt
; ----------------------------------GFX10--------------------------------------
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.3-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.3-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1036 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1036 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -84,6 +94,11 @@ define amdgpu_kernel void @test_kernel() {
; RUN: llvm-objdump -D %t.o > %t-detect.txt
; RUN: diff %t-specify.txt %t-detect.txt
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx10.1-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx10.1-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1013 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx1013 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
@@ -107,6 +122,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx9-generic -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn -mllvm --amdhsa-code-object-version=6 --mcpu=gfx9-generic %t.o > %t-specify.txt
+; RUN: llvm-objdump -D -mllvm --amdhsa-code-object-version=6 %t.o > %t-detect.txt
+; RUN: diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
diff --git a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
index e2266d81d1a597..7fbf4aae6c2460 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/AMDGPU/elf-headers.test
@@ -253,6 +253,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC -DFLAG_VALUE=0x52
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1013 -DFLAG_VALUE=0x42
@@ -322,6 +325,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1036 -DFLAG_VALUE=0x45
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC -DFLAG_VALUE=0x53
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME="EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_FEATURE_XNACK_V3"
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,DOUBLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_0="EF_AMDGPU_FEATURE_XNACK_V3 (0x100)" -DFLAG_1="EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)" -DFLAG_VALUE=0x13F
@@ -355,6 +361,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=16 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90A
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,UNKNOWN-ABI-VERSION --match-full-lines -DABI_VERSION=16 -DFILE=%t -DFLAG_VALUE=0x3F
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC -DFLAG_VALUE=0x51
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1100 -DFLAG_VALUE=0x41
@@ -391,6 +400,9 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1103 -DFLAG_VALUE=0x44
+# RUN: yaml2obj %s -o %t -DABI_VERSION=4 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=4 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC -DFLAG_VALUE=0x54
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1150 -DFLAG_VALUE=0x43
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 82bb12f95d3a31..8e68f08c3fa9ab 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1559,68 +1559,72 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
};
// clang-format off
-#define AMDGPU_MACH_ENUM_ENTS \
- ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
- ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201")
+#define AMDGPU_MACH_ENUM_ENTS \
+ ENUM_ENT(EF_AMDGPU_MACH_NONE, "none"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R600, "r600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_R630, "r630"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RS880, "rs880"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV670, "rv670"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV710, "rv710"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV730, "rv730"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_RV770, "rv770"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CEDAR, "cedar"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CYPRESS, "cypress"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_JUNIPER, "juniper"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_REDWOOD, "redwood"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_SUMO, "sumo"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_BARTS, "barts"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAICOS, "caicos"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_CAYMAN, "cayman"), \
+ ENUM_ENT(EF_AMDGPU_MACH_R600_TURKS, "turks"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX600, "gfx600"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX601, "gfx601"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX602, "gfx602"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX700, "gfx700"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX701, "gfx701"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX702, "gfx702"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX703, "gfx703"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX704, "gfx704"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX705, "gfx705"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX801, "gfx801"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX802, "gfx802"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX803, "gfx803"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX805, "gfx805"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX810, "gfx810"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX900, "gfx900"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX902, "gfx902"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX904, "gfx904"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX906, "gfx906"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX908, "gfx908"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1011, "gfx1011"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1012, "gfx1012"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1013, "gfx1013"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1030, "gfx1030"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1031, "gfx1031"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1032, "gfx1032"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1033, "gfx1033"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1034, "gfx1034"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1035, "gfx1035"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1036, "gfx1036"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1100, "gfx1100"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1101, "gfx1101"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1102, "gfx1102"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1103, "gfx1103"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1150, "gfx1150"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1151, "gfx1151"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1200, "gfx1200"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1201, "gfx1201"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX9_GENERIC, "gfx9-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_1_GENERIC, "gfx10.1-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX10_3_GENERIC, "gfx10.3-generic"), \
+ ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX11_GENERIC, "gfx11-generic")
// clang-format on
const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
>From ef109b2b86d3a39fba77e60937e70fd65e9adb8b Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 6 Feb 2024 10:54:37 +0100
Subject: [PATCH 2/7] fix test
---
clang/test/Misc/target-invalid-cpu-note.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 2f10bfb1fd82fe..4cc748b218079a 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -37,7 +37,7 @@
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
-// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201{{$}}
+// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103, gfx1150, gfx1151, gfx1200, gfx1201, gfx9-generic, gfx10.1-generic, gfx10.3-generic, gfx11-generic{{$}}
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'
>From 8d762472523c3dcd1f217a521163ed10cf75146f Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 6 Feb 2024 14:44:33 +0100
Subject: [PATCH 3/7] nits
---
clang/lib/Basic/Targets/AMDGPU.cpp | 6 +++---
llvm/docs/AMDGPUUsage.rst | 6 ++----
llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp | 3 ++-
3 files changed, 7 insertions(+), 8 deletions(-)
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 799634ccec7ba5..7cfae129a6ae48 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -17,6 +17,7 @@
#include "clang/Basic/LangOptions.h"
#include "clang/Basic/MacroBuilder.h"
#include "clang/Basic/TargetBuiltins.h"
+#include "llvm/ADT/SmallString.h"
using namespace clang;
using namespace clang::targets;
@@ -279,9 +280,8 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
return;
- std::string CanonName = (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
- : getArchNameR600(GPUKind))
- .str();
+ llvm::SmallString<16> CanonName = (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
+ : getArchNameR600(GPUKind));
// Sanitize the name of generic targets.
// e.g. gfx10.1-generic -> gfx10_1_generic
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 4f4674b5b1aea7..2604c5dc8e3257 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -520,10 +520,8 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
=========== =============== ============ ===== ================= =============== =============== ======================
-Generic processors also exist.
-Generic processor code objects can be executed on any of the processors that
-are supported by the generic processor. Such code objects may not perform as well
-as those for the non-generic processors.
+Generic processors allow execution of a single code objects on any of the processors that
+it supports. Such code objects may not perform as well as those for the non-generic processors.
Generic processors are only available on code object V6 and above (see :ref:`amdgpu-elf-code-object`).
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index e5bc0233f534bd..5777a7cabb397f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -157,10 +157,11 @@ void AMDGPUAsmPrinter::emitFunctionBodyStart() {
const Function &F = MF->getFunction();
// TODO: We're checking this late, would be nice to check it earlier.
- if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6)
+ if (STM.requiresCodeObjectV6() && CodeObjectVersion < AMDGPU::AMDHSA_COV6) {
report_fatal_error(
STM.getCPU() + " is only available on code object version 6 or better",
/*gen_crash_diag*/ false);
+ }
// TODO: Which one is called first, emitStartOfAsmFile or
// emitFunctionBodyStart?
>From d546dae033cd3f36d09542dbeaf7dd22055ebf83 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 6 Feb 2024 14:50:48 +0100
Subject: [PATCH 4/7] clang-format
---
clang/lib/Basic/Targets/AMDGPU.cpp | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index 7cfae129a6ae48..10cba6b7eac5cc 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -280,8 +280,9 @@ void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
if (GPUKind == llvm::AMDGPU::GK_NONE && !IsHIPHost)
return;
- llvm::SmallString<16> CanonName = (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
- : getArchNameR600(GPUKind));
+ llvm::SmallString<16> CanonName =
+ (isAMDGCN(getTriple()) ? getArchNameAMDGCN(GPUKind)
+ : getArchNameR600(GPUKind));
// Sanitize the name of generic targets.
// e.g. gfx10.1-generic -> gfx10_1_generic
>From d0969e5a2c3772a80e073a266dcad8918f1fa6b7 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 7 Feb 2024 09:15:10 +0100
Subject: [PATCH 5/7] doc
---
llvm/docs/AMDGPUUsage.rst | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 2604c5dc8e3257..c78164f31610c0 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -520,15 +520,21 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
=========== =============== ============ ===== ================= =============== =============== ======================
-Generic processors allow execution of a single code objects on any of the processors that
+Generic processors allow execution of a single code object on any of the processors that
it supports. Such code objects may not perform as well as those for the non-generic processors.
Generic processors are only available on code object V6 and above (see :ref:`amdgpu-elf-code-object`).
-Generic processor code objects are versioned (see :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`).
-The version number is used by runtimes to determine if a code object can be run on a specific agent.
-The version number may be increased as-needed, e.g., when a major codegen change occurs for a generic
-target.
+Generic processor code objects are versioned (see :ref:`amdgpu-elf-header-e_flags-table-v6-onwards`) between 1 and 255.
+The version of non-generic code objects is always set to 0.
+
+For a generic code object, adding a new supported processor may require the code generated for the generic target to be changed
+so it can continue to execute on the previously supported processors as well as on the new one.
+When this happens, the generic code object version number is incremented at the same time as the generic target is updated.
+
+Each supported processor of a generic target is mapped to the version it was introduced in.
+A generic code object can execute on a supported processor if the version of the code object being loaded is
+greater than or equal to the version in which the processor was added to the generic target.
.. table:: AMDGPU Generic Processors
:name: amdgpu-generic-processor-table
@@ -1771,9 +1777,11 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_FEATURE_SRAMECC_ANY_V4`` 0x400 SRAMECC can have any value.
``EF_AMDGPU_FEATURE_SRAMECC_OFF_V4`` 0x800 SRAMECC disabled,
``EF_AMDGPU_FEATURE_SRAMECC_ON_V4`` 0xc00 SRAMECC enabled.
- ``EF_AMDGPU_GENERIC_VERSION_V<X>`` 0x01000000 Value between 1 and 255 for generic code
- to object versioning, stored in the MSB.
- 0xff000000 See :ref:`amdgpu-generic-processor-table`
+ ``EF_AMDGPU_GENERIC_VERSION_V`` 0xff000000 Generic code object version selection
+ mask. This is a value between 1 and 255,
+ stored in the most significant byte
+ of EFLAGS.
+ See :ref:`amdgpu-generic-processor-table`
============================================ ========== =========================================
.. table:: AMDGPU ``EF_AMDGPU_MACH`` Values
>From f2ebf6922c4e3d1fe7068b5ca3db15f738717e46 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 7 Feb 2024 15:30:35 +0100
Subject: [PATCH 6/7] remove md change
---
llvm/docs/AMDGPUUsage.rst | 27 ---------------------------
1 file changed, 27 deletions(-)
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index c78164f31610c0..1df2288b3003b2 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -4293,33 +4293,6 @@ Code object V5 metadata is the same as
====================== ============== ========= ================================
-.. _amdgpu-amdhsa-code-object-metadata-v6:
-
-Code Object V6 Metadata
-+++++++++++++++++++++++
-
-.. warning::
- Code object V6 is not the default code object version emitted by this version
- of LLVM.
-
-
-Code object V6 metadata is the same as
-:ref:`amdgpu-amdhsa-code-object-metadata-v5` with the changes defined in table
-:ref:`amdgpu-amdhsa-code-object-metadata-map-table-v6`.
-
- .. table:: AMDHSA Code Object V6 Metadata Map Changes
- :name: amdgpu-amdhsa-code-object-metadata-map-table-v6
-
- ================= ============== ========= =======================================
- String Key Value Type Required? Description
- ================= ============== ========= =======================================
- "amdhsa.version" sequence of Required - The first integer is the major
- 2 integers version. Currently 1.
- - The second integer is the minor
- version. Currently 3.
- ================= ============== ========= =======================================
-
-
..
Kernel Dispatch
>From e9253a7cba4e6e1b0dab09c3de547e587aaf673f Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Fri, 9 Feb 2024 10:18:04 +0100
Subject: [PATCH 7/7] add test for mad-mix + fix other test
---
.../GlobalISel/llvm.amdgcn.workitem.id.ll | 20 +-
llvm/test/CodeGen/AMDGPU/mad-mix.ll | 421 ++++++++++++++++++
2 files changed, 431 insertions(+), 10 deletions(-)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
index 4e921f25620544..9698a3894db68c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.workitem.id.ll
@@ -1,12 +1,12 @@
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs < %s | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
-; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
-; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
-; RUN: llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 < %s | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs | FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-amdhsa -verify-machineinstrs| FileCheck --check-prefixes=ALL,HSA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=hawaii -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -mattr=+flat-for-global -verify-machineinstrs | FileCheck --check-prefixes=ALL,MESA,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mattr=+flat-for-global -mcpu=hawaii -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn-unknown-mesa3d -mcpu=tonga -verify-machineinstrs | FileCheck -check-prefixes=ALL,MESA3D,UNPACKED %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -verify-machineinstrs | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: sed 's/CODE_OBJECT_VERSION/400/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1100 -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
+; RUN: sed 's/CODE_OBJECT_VERSION/600/g' %s | llc -global-isel -mtriple=amdgcn -mtriple=amdgcn-unknown-amdhsa --amdhsa-code-object-version=6 -mcpu=gfx11-generic -verify-machineinstrs -amdgpu-enable-vopd=0 | FileCheck -check-prefixes=ALL,PACKED-TID %s
declare i32 @llvm.amdgcn.workitem.id.x() #0
declare i32 @llvm.amdgcn.workitem.id.y() #0
@@ -201,4 +201,4 @@ attributes #1 = { nounwind }
!2 = !{i32 1, i32 1, i32 64}
!llvm.module.flags = !{!99}
-!99 = !{i32 1, !"amdgpu_code_object_version", i32 400}
+!99 = !{i32 1, !"amdgpu_code_object_version", i32 CODE_OBJECT_VERSION}
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix.ll b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
index e8b9526774d89a..b520dd1060ec8c 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix.ll
@@ -2,12 +2,14 @@
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,SDAG-GFX1100 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,SDAG-GFX900 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,SDAG-GFX906 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic -verify-machineinstrs --amdhsa-code-object-version=6 < %s | FileCheck -check-prefixes=GFX9GEN,SDAG-GFX9GEN %s
; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,SDAG-VI %s
; RUN: llc -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,SDAG-CI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX1100,GISEL-GFX1100 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX900,GISEL-GFX900 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx906 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX906,GISEL-GFX906 %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GEN,GISEL-GFX9GEN %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI,GISEL-VI %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefixes=CI,GISEL-CI %s
@@ -30,6 +32,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo(half %src0, half %src1, half %src2
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -79,6 +90,15 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_int(i32 %src0, i32 %src1, i32 %src
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_int:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -134,6 +154,15 @@ define float @v_mad_mix_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x half> %
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16hi_f16hi_f16hi_elt:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -193,6 +222,19 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v3
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v6, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v5
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v4, v6
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -242,6 +284,19 @@ define <2 x float> @v_mad_mix_v2f32(<2 x half> %src0, <2 x half> %src1, <2 x hal
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v3
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v5, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v5
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v4, v6
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -300,6 +355,19 @@ define <2 x float> @v_mad_mix_v2f32_shuffle(<2 x half> %src0, <2 x half> %src1,
; GFX906-NEXT: v_mov_b32_e32 v0, v3
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_v2f32_shuffle:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v4, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mad_f32 v0, v3, v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v2, v4, v1
+; GFX9GEN-NEXT: v_mov_b32_e32 v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_v2f32_shuffle:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -378,6 +446,15 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -393,6 +470,15 @@ define float @v_mad_mix_f32_negf16lo_f16lo_f16lo(half %src0, half %src1, half %s
; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_negf16lo_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -437,6 +523,15 @@ define float @v_mad_mix_f32_absf16lo_f16lo_f16lo(half %src0, half %src1, half %s
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_absf16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -487,6 +582,15 @@ define float @v_mad_mix_f32_negabsf16lo_f16lo_f16lo(half %src0, half %src1, half
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mad_f32 v0, -|v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_negabsf16lo_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -538,6 +642,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32(half %src0, half %src1, float %src2)
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -584,6 +696,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_negf32(half %src0, half %src1, float %sr
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negf32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -631,6 +751,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_absf32(half %src0, half %src1, float %sr
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, |v2| op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, |v2|
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_absf32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -678,6 +806,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_negabsf32(half %src0, half %src1, float
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, -|v2| op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, -|v2|
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_negabsf32:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -734,6 +870,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imm1(half %src0, half %src1) #0 {
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 1.0
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imm1:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -806,6 +950,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32imminv2pi(half %src0, half %src1) #0
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, 0.15915494
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32imminv2pi:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -885,6 +1037,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1)
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x3e230000
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -921,6 +1081,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi(half %src0, half %src1)
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x3e230000
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imminv2pi:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -969,6 +1138,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
; SDAG-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, s4 op_sel_hi:[1,1,0]
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v1, 0x367c0000
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1005,6 +1182,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_cvtf16imm63(half %src0, half %src1) #0 {
; GISEL-GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v0, 0x367c0000
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v2, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_f16lo_f16lo_cvtf16imm63:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1058,6 +1244,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 1.0
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 1.0
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imm1:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1112,6 +1309,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imm1(<2 x half> %src0, <2 x half> %src1)
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imm1:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 1.0
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 1.0
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imm1:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1168,6 +1376,18 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
+; SDAG-GFX9GEN-NEXT: v_madak_f32 v0, v0, v3, 0x3e230000
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v1, v2, v4
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1224,6 +1444,18 @@ define <2 x float> @v_mad_mix_v2f32_cvtf16imminv2pi(<2 x half> %src0, <2 x half>
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mov_b32_e32 v1, 0x3e230000
+; GISEL-GFX9GEN-NEXT: v_madak_f32 v0, v2, v0, 0x3e230000
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v1, v3, v4
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32_cvtf16imminv2pi:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1283,6 +1515,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
; SDAG-GFX906-NEXT: v_mov_b32_e32 v1, v2
; SDAG-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, v0, v3, 0.15915494
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v1, v2, v1, 0.15915494
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1338,6 +1581,17 @@ define <2 x float> @v_mad_mix_v2f32_f32imminv2pi(<2 x half> %src0, <2 x half> %s
; GISEL-GFX906-NEXT: v_mov_b32_e32 v0, v2
; GISEL-GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_v2f32_f32imminv2pi:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v0, v2, v0, 0.15915494
+; GISEL-GFX9GEN-NEXT: v_mad_f32 v1, v3, v1, 0.15915494
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_v2f32_f32imminv2pi:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1386,6 +1640,15 @@ define float @v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt(<2 x half> %src0, <2 x h
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel:[1,1,1] op_sel_hi:[1,1,1] clamp
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v2, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2 clamp
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_clamp_f32_f16hi_f16hi_f16hi_elt:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1440,6 +1703,12 @@ define float @no_mix_simple(float %src0, float %src1, float %src2) #0 {
; GFX906-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: no_mix_simple:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: no_mix_simple:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1474,6 +1743,12 @@ define float @no_mix_simple_fabs(float %src0, float %src1, float %src2) #0 {
; GFX906-NEXT: v_fma_f32 v0, |v0|, v1, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: no_mix_simple_fabs:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: no_mix_simple_fabs:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1516,6 +1791,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals(half %src0, half %sr
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1568,6 +1852,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals(half %src0, half %src1, fl
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1628,6 +1920,16 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd(half %src0,
; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_denormals_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1691,6 +1993,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd(half %src0, half
; GFX906-NEXT: v_add_f32_e32 v0, v0, v2
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9GEN-NEXT: v_add_f32_e32 v0, v0, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_denormals_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1741,6 +2052,15 @@ define float @v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd(half %src0, hal
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v3, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f16lo_f32_flush_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1791,6 +2111,14 @@ define float @v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd(half %src0, half %src
; GFX906-NEXT: v_fma_mix_f32 v0, v0, v1, v2 op_sel_hi:[1,1,0]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_f16lo_f16lo_f32_flush_fmulfadd:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1838,6 +2166,15 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v0
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1854,6 +2191,15 @@ define float @v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo(i32 %src0.arg, half %src1
; SDAG-CI-NEXT: v_mad_f32 v0, -v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e64 v3, -v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_negprecvtf16lo_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1910,6 +2256,15 @@ define float @v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo(i32 %src0.arg, half %
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; GFX9GEN-NEXT: v_mad_f32 v0, |v0|, v1, v2
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_precvtnegf16hi_abs_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -1966,6 +2321,15 @@ define float @v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo(i32 %src0.arg, half %src1
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; GFX9GEN-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
+; GFX9GEN: ; %bb.0:
+; GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; VI-LABEL: v_mad_mix_f32_precvtabsf16hi_f16lo_f16lo:
; VI: ; %bb.0:
; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2021,6 +2385,15 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half
; GFX906-NEXT: v_fma_mix_f32 v0, -v0, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2038,6 +2411,16 @@ define float @v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo(i32 %src0.arg, half
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_xor_b32_e32 v0, 0x80008000, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfneg_f16hi_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2087,6 +2470,15 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half
; GFX906-NEXT: v_fma_mix_f32 v0, |v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; SDAG-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2104,6 +2496,16 @@ define float @v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo(i32 %src0.arg, half
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabs_f16hi_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2153,6 +2555,15 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg,
; GFX906-NEXT: v_fma_mix_f32 v0, -|v0|, v1, v2 op_sel:[1,0,0] op_sel_hi:[1,1,1]
; GFX906-NEXT: s_setpc_b64 s[30:31]
;
+; SDAG-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; SDAG-GFX9GEN: ; %bb.0:
+; SDAG-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v0, |v0| dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; SDAG-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v2, v2
+; SDAG-GFX9GEN-NEXT: v_mad_f32 v0, -v0, v1, v2
+; SDAG-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; SDAG-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
; SDAG-VI: ; %bb.0:
; SDAG-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -2170,6 +2581,16 @@ define float @v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo(i32 %src0.arg,
; SDAG-CI-NEXT: v_mad_f32 v0, v0, v1, v2
; SDAG-CI-NEXT: s_setpc_b64 s[30:31]
;
+; GISEL-GFX9GEN-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
+; GISEL-GFX9GEN: ; %bb.0:
+; GISEL-GFX9GEN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-GFX9GEN-NEXT: v_or_b32_e32 v0, 0x80008000, v0
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v1, v1
+; GISEL-GFX9GEN-NEXT: v_cvt_f32_f16_e32 v0, v2
+; GISEL-GFX9GEN-NEXT: v_mac_f32_e32 v0, v3, v1
+; GISEL-GFX9GEN-NEXT: s_setpc_b64 s[30:31]
+;
; GISEL-VI-LABEL: v_mad_mix_f32_preextractfabsfneg_f16hi_f16lo_f16lo:
; GISEL-VI: ; %bb.0:
; GISEL-VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
More information about the llvm-commits
mailing list