[Openmp-commits] [openmp] 2e2e64d - [AMDGPU] Add gfx940 target
Stanislav Mekhanoshin via Openmp-commits
openmp-commits at lists.llvm.org
Wed Mar 2 13:54:59 PST 2022
Author: Stanislav Mekhanoshin
Date: 2022-03-02T13:54:48-08:00
New Revision: 2e2e64df4a4f6580499fa09c3c16d5c39d9dd4cb
URL: https://github.com/llvm/llvm-project/commit/2e2e64df4a4f6580499fa09c3c16d5c39d9dd4cb
DIFF: https://github.com/llvm/llvm-project/commit/2e2e64df4a4f6580499fa09c3c16d5c39d9dd4cb.diff
LOG: [AMDGPU] Add gfx940 target
This is target definition only.
Differential Revision: https://reviews.llvm.org/D120688
Added:
llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
Modified:
clang/include/clang/Basic/Cuda.h
clang/lib/Basic/Cuda.cpp
clang/lib/Basic/Targets/AMDGPU.cpp
clang/lib/Basic/Targets/NVPTX.cpp
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/test/CodeGenOpenCL/amdgpu-features.cl
clang/test/Driver/amdgpu-macros.cl
clang/test/Driver/amdgpu-mcpu.cl
clang/test/Driver/cuda-bad-arch.cu
clang/test/Misc/target-invalid-cpu-note.c
llvm/docs/AMDGPUUsage.rst
llvm/include/llvm/BinaryFormat/ELF.h
llvm/include/llvm/Support/TargetParser.h
llvm/lib/Object/ELFObjectFile.cpp
llvm/lib/ObjectYAML/ELFYAML.cpp
llvm/lib/Support/TargetParser.cpp
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
llvm/lib/Target/AMDGPU/GCNProcessors.td
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll
llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
llvm/tools/llvm-readobj/ELFDumper.cpp
openmp/libomptarget/DeviceRTL/CMakeLists.txt
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index 04967925e9a40..2596ec7b4e83f 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -85,6 +85,7 @@ enum class CudaArch {
GFX909,
GFX90a,
GFX90c,
+ GFX940,
GFX1010,
GFX1011,
GFX1012,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index 2d75578b3de02..04ee577ec9aa2 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -113,6 +113,7 @@ static const CudaArchToStringMap arch_names[] = {
GFX(909), // gfx909
GFX(90a), // gfx90a
GFX(90c), // gfx90c
+ GFX(940), // gfx940
GFX(1010), // gfx1010
GFX(1011), // gfx1011
GFX(1012), // gfx1012
diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp
index ba7ffa34c73e3..a20c79dfae200 100644
--- a/clang/lib/Basic/Targets/AMDGPU.cpp
+++ b/clang/lib/Basic/Targets/AMDGPU.cpp
@@ -227,6 +227,9 @@ bool AMDGPUTargetInfo::initFeatureMap(
Features["s-memrealtime"] = true;
Features["s-memtime-inst"] = true;
break;
+ case GK_GFX940:
+ Features["gfx940-insts"] = true;
+ LLVM_FALLTHROUGH;
case GK_GFX90A:
Features["gfx90a-insts"] = true;
LLVM_FALLTHROUGH;
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 75e82d8199009..f56d2d015333e 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -205,6 +205,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::GFX909:
case CudaArch::GFX90a:
case CudaArch::GFX90c:
+ case CudaArch::GFX940:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index baf509d54fb38..aa8afbb47b95c 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3916,6 +3916,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::GFX909:
case CudaArch::GFX90a:
case CudaArch::GFX90c:
+ case CudaArch::GFX940:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index a532da344df1a..55f569484560e 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -25,6 +25,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx909 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX909 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90a -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90A %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@@ -58,6 +59,7 @@
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX90A: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
+// GFX940: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst"
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index 965fdda67945f..e92ee68063103 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -107,6 +107,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx909
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90a
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90c
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index 432533b10d9dc..edb5ec14d504f 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -92,6 +92,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx909 %s 2>&1 | FileCheck --check-prefix=GFX909 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefix=GFX90A %s
// RUN: %clang -### -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefix=GFX90C %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
@@ -126,6 +127,7 @@
// GFX909: "-target-cpu" "gfx909"
// GFX90A: "-target-cpu" "gfx90a"
// GFX90C: "-target-cpu" "gfx90c"
+// GFX940: "-target-cpu" "gfx940"
// GFX1010: "-target-cpu" "gfx1010"
// GFX1011: "-target-cpu" "gfx1011"
// GFX1012: "-target-cpu" "gfx1012"
diff --git a/clang/test/Driver/cuda-bad-arch.cu b/clang/test/Driver/cuda-bad-arch.cu
index 1857f141649bc..d376df8ab7ae0 100644
--- a/clang/test/Driver/cuda-bad-arch.cu
+++ b/clang/test/Driver/cuda-bad-arch.cu
@@ -29,6 +29,8 @@
// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -x hip -target x86_64-linux-gnu --cuda-gpu-arch=gfx90a -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s
+// RUN: %clang -### -target x86_64-linux-gnu --cuda-gpu-arch=gfx940 -c %s 2>&1 \
+// RUN: | FileCheck -check-prefix OK %s
// RUN: %clang -### -target x86_64-linux-gnu -c %s 2>&1 \
// RUN: | FileCheck -check-prefix OK %s
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 9b3cb45f8e23c..d03b4f6918cdd 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -29,7 +29,7 @@
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
-// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
@@ -37,7 +37,7 @@
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
-// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
+// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035{{$}}
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index eccd7a14e8ea4..5fc90020b6f05 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -374,6 +374,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
- Ryzen 3 Pro 4350G
- Ryzen 3 Pro 4350GE
+ ``gfx940`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
+ - tgsplit flat
+ - xnack scratch .. TODO::
+ - Packed
+ work-item Add product
+ IDs names.
+
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
-----------------------------------------------------------------------------------------------------------------------
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
@@ -1224,7 +1231,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
- *reserved* 0x040 Reserved.
+ ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
*reserved* 0x041 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
*reserved* 0x043 Reserved.
@@ -3866,7 +3873,7 @@ The fields used by CP for code objects before V3 also match those specified in
bytes
383:352 4 bytes COMPUTE_PGM_RSRC3 GFX6-GFX9
Reserved, must be 0.
- GFX90A
+ GFX90A, GFX940
Compute Shader (CS)
program settings used by
CP to set up
@@ -3960,7 +3967,7 @@ The fields used by CP for code objects before V3 also match those specified in
GFX6-GFX9
- vgprs_used 0..256
- max(0, ceil(vgprs_used / 4) - 1)
- GFX90A
+ GFX90A, GFX940
- vgprs_used 0..512
- vgprs_used = align(arch_vgprs, 4)
+ acc_vgprs
@@ -4408,7 +4415,7 @@ The fields used by CP for code objects before V3 also match those specified in
..
- .. table:: compute_pgm_rsrc3 for GFX90A
+ .. table:: compute_pgm_rsrc3 for GFX90A, GFX940
:name: amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table
======= ======= =============================== ===========================================================================
@@ -12268,7 +12275,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
``.amdhsa_user_sgpr_count`` 0 GFX6-GFX10 Controls USER_SGPR_COUNT in COMPUTE_PGM_RSRC2
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`
``.amdhsa_user_sgpr_private_segment_buffer`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER in
- :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+ (except :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+ GFX940)
``.amdhsa_user_sgpr_dispatch_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_PTR in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_user_sgpr_queue_ptr`` 0 GFX6-GFX10 Controls ENABLE_SGPR_QUEUE_PTR in
@@ -12278,7 +12286,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
``.amdhsa_user_sgpr_dispatch_id`` 0 GFX6-GFX10 Controls ENABLE_SGPR_DISPATCH_ID in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_user_sgpr_flat_scratch_init`` 0 GFX6-GFX10 Controls ENABLE_SGPR_FLAT_SCRATCH_INIT in
- :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+ (except :ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
+ GFX940)
``.amdhsa_user_sgpr_private_segment_size`` 0 GFX6-GFX10 Controls ENABLE_SGPR_PRIVATE_SEGMENT_SIZE in
:ref:`amdgpu-amdhsa-kernel-descriptor-v3-table`.
``.amdhsa_wavefront_size32`` Target GFX10 Controls ENABLE_WAVEFRONT_SIZE32 in
@@ -12286,6 +12295,9 @@ terminated by an ``.end_amdhsa_kernel`` directive.
Specific
(wavefrontsize64)
``.amdhsa_system_sgpr_private_segment_wavefront_offset`` 0 GFX6-GFX10 Controls ENABLE_PRIVATE_SEGMENT in
+ (except :ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
+ GFX940)
+ ``.amdhsa_enable_private_segment`` 0 GFX940 Controls ENABLE_PRIVATE_SEGMENT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
``.amdhsa_system_sgpr_workgroup_id_x`` 1 GFX6-GFX10 Controls ENABLE_SGPR_WORKGROUP_ID_X in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc2-gfx6-gfx10-table`.
@@ -12305,15 +12317,15 @@ terminated by an ``.end_amdhsa_kernel`` directive.
``.amdhsa_next_free_sgpr`` Required GFX6-GFX10 Maximum SGPR number explicitly referenced, plus one.
Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
- ``.amdhsa_accum_offset`` Required GFX90A Offset of a first AccVGPR in the unified register file.
- Used to calculate ACCUM_OFFSET in
+ ``.amdhsa_accum_offset`` Required GFX90A, Offset of a first AccVGPR in the unified register file.
+ GFX940 Used to calculate ACCUM_OFFSET in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`.
``.amdhsa_reserve_vcc`` 1 GFX6-GFX10 Whether the kernel may use the special VCC SGPR.
Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_reserve_flat_scratch`` 1 GFX7-GFX10 Whether the kernel may use flat instructions to access
- scratch memory. Used to calculate
- GRANULATED_WAVEFRONT_SGPR_COUNT in
+ (except scratch memory. Used to calculate
+ GFX940) GRANULATED_WAVEFRONT_SGPR_COUNT in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_reserve_xnack_mask`` Target GFX8-GFX10 Whether the kernel may trigger XNACK replay.
Feature Used to calculate GRANULATED_WAVEFRONT_SGPR_COUNT in
@@ -12341,8 +12353,8 @@ terminated by an ``.end_amdhsa_kernel`` directive.
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
``.amdhsa_fp16_overflow`` 0 GFX9-GFX10 Controls FP16_OVFL in
:ref:`amdgpu-amdhsa-compute_pgm_rsrc1-gfx6-gfx10-table`.
- ``.amdhsa_tg_split`` Target GFX90A Controls TG_SPLIT in
- Feature :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`.
+ ``.amdhsa_tg_split`` Target GFX90A, Controls TG_SPLIT in
+ Feature GFX940 :ref:`amdgpu-amdhsa-compute_pgm_rsrc3-gfx90a-table`.
Specific
(tgsplit)
``.amdhsa_workgroup_processor_mode`` Target GFX10 Controls ENABLE_WGP_MODE in
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index bc34f94cf1d1b..d6e76be8d18ac 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -754,7 +754,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
- EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040,
+ EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X41 = 0x041,
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X43 = 0x043,
diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index d4880d638cf72..23d953e0d026a 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -86,6 +86,7 @@ enum GPUKind : uint32_t {
GK_GFX909 = 65,
GK_GFX90A = 66,
GK_GFX90C = 67,
+ GK_GFX940 = 68,
GK_GFX1010 = 71,
GK_GFX1011 = 72,
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 0d2ee6210dd05..239ca4160f0cf 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -459,6 +459,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx90a";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C:
return "gfx90c";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:
+ return "gfx940";
// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 96c13ea1544fc..c3cc84b3952ee 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -563,6 +563,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index b3eb43e2038bf..24d14580945c3 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -104,6 +104,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
+ {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
@@ -217,6 +218,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX909: return {9, 0, 9};
case GK_GFX90A: return {9, 0, 10};
case GK_GFX90C: return {9, 0, 12};
+ case GK_GFX940: return {9, 4, 0};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 1fb51a54826e3..c34552da5727f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -313,6 +313,12 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts",
"Additional instructions for GFX90A+"
>;
+def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
+ "GFX940Insts",
+ "true",
+ "Additional instructions for GFX940+"
+>;
+
def FeatureGFX10Insts : SubtargetFeature<"gfx10-insts",
"GFX10Insts",
"true",
@@ -1016,6 +1022,30 @@ def FeatureISAVersion9_0_C : FeatureSet<
FeatureMadMacF32Insts,
FeatureImageGather4D16Bug]>;
+def FeatureISAVersion9_4_0 : FeatureSet<
+ [FeatureGFX9,
+ FeatureGFX90AInsts,
+ FeatureGFX940Insts,
+ FeatureFmaMixInsts,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot3Insts,
+ FeatureDot4Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureDot7Insts,
+ Feature64BitDPP,
+ FeaturePackedFP32Ops,
+ FeatureMAIInsts,
+ FeaturePkFmacF16Inst,
+ FeatureAtomicFaddInsts,
+ FeatureSupportsSRAMECC,
+ FeaturePackedTID,
+ FeatureArchitectedFlatScratch,
+ FullRate64Ops]>;
+
// TODO: Organize more features into groups.
def FeatureGroup {
// Bugs present on gfx10.1.
@@ -1293,12 +1323,22 @@ def isGFX8GFX9NotGFX90A :
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
def isGFX90AOnly :
- Predicate<"Subtarget->hasGFX90AInsts()">,
- AssemblerPredicate<(all_of FeatureGFX90AInsts)>;
+ Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;
def isGFX908orGFX90A :
- Predicate<"Subtarget->hasMAIInsts()">,
- AssemblerPredicate<(all_of FeatureMAIInsts)>;
+ Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>;
+
+def isGFX940Plus :
+ Predicate<"Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of FeatureGFX940Insts)>;
+
+def isGFX8GFX9NotGFX940 :
+ Predicate<"!Subtarget->hasGFX940Insts() &&"
+ "(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
+ " Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
+ AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>;
def isGFX8GFX9 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
@@ -1327,7 +1367,7 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>;
def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">,
- AssemblerPredicate<(any_of FeatureGFX10_3Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>;
def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">,
AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index f01143c5d7d3c..521e5880c6e29 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -216,6 +216,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
GFX8Insts(false),
GFX9Insts(false),
GFX90AInsts(false),
+ GFX940Insts(false),
GFX10Insts(false),
GFX10_3Insts(false),
GFX7GFX8GFX9Insts(false),
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index 3a68ed1934e15..aac1229c1ef4b 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -192,6 +192,10 @@ def : ProcessorModel<"gfx90c", SIQuarterSpeedModel,
FeatureISAVersion9_0_C.Features
>;
+def : ProcessorModel<"gfx940", SIDPFullSpeedModel,
+ FeatureISAVersion9_4_0.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index d60960f9d98cd..5e2b01238c7ed 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -101,6 +101,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool GFX8Insts;
bool GFX9Insts;
bool GFX90AInsts;
+ bool GFX940Insts;
bool GFX10Insts;
bool GFX10_3Insts;
bool GFX7GFX8GFX9Insts;
@@ -559,7 +560,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// The ST addressing mode means no registers are used, either VGPR or SGPR,
// but only immediate offset is swizzled and added to the FLAT scratch base.
bool hasFlatScratchSTMode() const {
- return hasFlatScratchInsts() && hasGFX10_3Insts();
+ return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
}
bool hasScalarFlatScratchInsts() const {
@@ -962,6 +963,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasPackedTID() const { return HasPackedTID; }
+ // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
+ // hasGFX90AInsts is also true.
+ bool hasGFX940Insts() const { return GFX940Insts; }
+
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
/// SGPRs
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index ef73ae237d615..3412f23671a4c 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -106,6 +106,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -169,6 +170,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
+ case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index a922c3fe8cb65..fb5fd3896c9c9 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -71,6 +71,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX90C-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX90C-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx940 < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX940-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX940-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=V3-GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa --amdhsa-code-object-version=3 -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=V3-GFX1010-XNACK %s
@@ -163,6 +166,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c < %s | FileCheck --check-prefixes=GFX90C %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack < %s | FileCheck --check-prefixes=GFX90C-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=+xnack < %s | FileCheck --check-prefixes=GFX90C-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefixes=GFX940 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX940-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX940-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
@@ -216,6 +222,8 @@
; V3-GFX909-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx909+xnack"
; V3-GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
; V3-GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c+xnack"
+; V3-GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+sram-ecc"
+; V3-GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
; V3-GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; V3-GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010+xnack"
; V3-GFX1011-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1011"
@@ -282,6 +290,9 @@
; GFX90C: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c"
; GFX90C-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack-"
; GFX90C-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx90c:xnack+"
+; GFX940: .amdgcn_target "amdgcn-amd-amdhsa--gfx940"
+; GFX940-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack-"
+; GFX940-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx940:xnack+"
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index 329dbc6d024e7..68b7bd630d5cf 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -54,6 +54,7 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx909 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX909 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90A %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90c < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90C %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
@@ -115,6 +116,7 @@
; GFX909: EF_AMDGPU_MACH_AMDGCN_GFX909 (0x31)
; GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
; GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32)
+; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
index 144061dfdf829..62a08cf7ad19b 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-sramecc.ll
@@ -9,6 +9,9 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90a < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX90A %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 -mattr=+sramecc < %s | llvm-readobj --file-header - | FileCheck --check-prefix=SRAM-ECC-GFX940 %s
+
; NO-SRAM-ECC-GFX906: Flags [
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_FEATURE_XNACK_V3 (0x100)
; NO-SRAM-ECC-GFX906-NEXT: EF_AMDGPU_MACH_AMDGCN_GFX906 (0x2F)
@@ -36,6 +39,11 @@
; SRAM-ECC-GFX90A: EF_AMDGPU_MACH_AMDGCN_GFX90A (0x3F)
; SRAM-ECC-GFX90A: ]
+; SRAM-ECC-GFX940: Flags [
+; SRAM-ECC-GFX940: EF_AMDGPU_FEATURE_SRAMECC_V3 (0x200)
+; SRAM-ECC-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
+; SRAM-ECC-GFX940: ]
+
define amdgpu_kernel void @elf_header() {
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll b/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll
index e8654f2efb6d1..9d060759a84be 100644
--- a/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll
+++ b/llvm/test/CodeGen/AMDGPU/tid-code-object-v2-backwards-compatibility.ll
@@ -1,6 +1,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -mattr=-xnack --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck --check-prefix=GFX90C-VALID %s
; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck --check-prefix=GFX90C-ERROR %s
+; RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 --amdhsa-code-object-version=2 < %s 2>&1 | FileCheck --check-prefix=GFX940-ERROR %s
; GFX90C-VALID: .hsa_code_object_isa 9,0,12,"AMD","AMDGPU"
; GFX90C-VALID: .amd_amdgpu_isa "amdgcn-amd-amdhsa--gfx90c"
; GFX90C-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx90c with XNACK being ON or ANY
+
+; GFX940-ERROR: LLVM ERROR: AMD GPU code object V2 does not support processor gfx940
diff --git a/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
new file mode 100644
index 0000000000000..d446513a65d88
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/hsa-gfx940-v3.s
@@ -0,0 +1,176 @@
+// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck --check-prefix=ASM %s
+// RUN: llvm-mc --amdhsa-code-object-version=3 -triple amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj < %s > %t
+// RUN: llvm-readelf -S -r -s %t | FileCheck --check-prefix=READOBJ %s
+// RUN: llvm-objdump -s -j .rodata %t | FileCheck --check-prefix=OBJDUMP %s
+
+// big endian not supported
+// XFAIL: powerpc-, powerpc64-, s390x, mips-, mips64-, sparc
+
+// READOBJ: Section Headers
+// READOBJ: .text PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9a-f]+}} {{[0-9]+}} AX {{[0-9]+}} {{[0-9]+}} 256
+// READOBJ: .rodata PROGBITS {{[0-9a-f]+}} {{[0-9a-f]+}} 000080 {{[0-9]+}} A {{[0-9]+}} {{[0-9]+}} 64
+
+// READOBJ: Relocation section '.rela.rodata' at offset
+// READOBJ: 0000000000000010 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 10
+// READOBJ: 0000000000000050 {{[0-9a-f]+}}00000005 R_AMDGPU_REL64 0000000000000000 .text + 110
+
+// READOBJ: Symbol table '.symtab' contains {{[0-9]+}} entries:
+// READOBJ-DAG: {{[0-9]+}}: 0000000000000100 0 FUNC LOCAL PROTECTED 2 complete
+// READOBJ-DAG: {{[0-9]+}}: 0000000000000040 64 OBJECT LOCAL DEFAULT 3 complete.kd
+// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 0 FUNC LOCAL PROTECTED 2 minimal
+// READOBJ-DAG: {{[0-9]+}}: 0000000000000000 64 OBJECT LOCAL DEFAULT 3 minimal.kd
+
+// OBJDUMP: Contents of section .rodata
+// Note, relocation for KERNEL_CODE_ENTRY_BYTE_OFFSET is not resolved here.
+// minimal
+// OBJDUMP-NEXT: 0000 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0010 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0020 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0030 0000ac00 80000000 00000000 00000000
+// complete
+// OBJDUMP-NEXT: 0040 01000000 01000000 00000000 00000000
+// OBJDUMP-NEXT: 0050 00000000 00000000 00000000 00000000
+// OBJDUMP-NEXT: 0060 00000000 00000000 00000000 00000100
+// OBJDUMP-NEXT: 0070 01510104 130f007f 5e000000 00000000
+
+.text
+// ASM: .text
+
+.amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
+// ASM: .amdgcn_target "amdgcn-amd-amdhsa--gfx940+xnack+sram-ecc"
+
+.p2align 8
+.type minimal, at function
+minimal:
+ s_endpgm
+
+.p2align 8
+.type complete, at function
+complete:
+ s_endpgm
+
+.rodata
+// ASM: .rodata
+
+// Test that only specifying required directives is allowed, and that defaulted
+// values are omitted.
+.p2align 6
+.amdhsa_kernel minimal
+ .amdhsa_next_free_vgpr 0
+ .amdhsa_next_free_sgpr 0
+ .amdhsa_accum_offset 4
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel minimal
+// ASM: .amdhsa_next_free_vgpr 0
+// ASM-NEXT: .amdhsa_next_free_sgpr 0
+// ASM-NEXT: .amdhsa_accum_offset 4
+// ASM: .amdhsa_tg_split 0
+// ASM: .end_amdhsa_kernel
+
+// Test that we can specify all available directives with non-default values.
+.p2align 6
+.amdhsa_kernel complete
+ .amdhsa_group_segment_fixed_size 1
+ .amdhsa_private_segment_fixed_size 1
+ .amdhsa_user_sgpr_dispatch_ptr 1
+ .amdhsa_user_sgpr_queue_ptr 1
+ .amdhsa_user_sgpr_kernarg_segment_ptr 1
+ .amdhsa_user_sgpr_dispatch_id 1
+ .amdhsa_user_sgpr_private_segment_size 1
+ .amdhsa_enable_private_segment 1
+ .amdhsa_system_sgpr_workgroup_id_x 0
+ .amdhsa_system_sgpr_workgroup_id_y 1
+ .amdhsa_system_sgpr_workgroup_id_z 1
+ .amdhsa_system_sgpr_workgroup_info 1
+ .amdhsa_system_vgpr_workitem_id 1
+ .amdhsa_next_free_vgpr 9
+ .amdhsa_next_free_sgpr 27
+ .amdhsa_accum_offset 4
+ .amdhsa_reserve_vcc 0
+ .amdhsa_float_round_mode_32 1
+ .amdhsa_float_round_mode_16_64 1
+ .amdhsa_float_denorm_mode_32 1
+ .amdhsa_float_denorm_mode_16_64 0
+ .amdhsa_dx10_clamp 0
+ .amdhsa_ieee_mode 0
+ .amdhsa_fp16_overflow 1
+ .amdhsa_tg_split 1
+ .amdhsa_exception_fp_ieee_invalid_op 1
+ .amdhsa_exception_fp_denorm_src 1
+ .amdhsa_exception_fp_ieee_div_zero 1
+ .amdhsa_exception_fp_ieee_overflow 1
+ .amdhsa_exception_fp_ieee_underflow 1
+ .amdhsa_exception_fp_ieee_inexact 1
+ .amdhsa_exception_int_div_zero 1
+.end_amdhsa_kernel
+
+// ASM: .amdhsa_kernel complete
+// ASM-NEXT: .amdhsa_group_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_private_segment_fixed_size 1
+// ASM-NEXT: .amdhsa_kernarg_size 0
+// ASM-NEXT: .amdhsa_user_sgpr_count 9
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_queue_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_kernarg_segment_ptr 1
+// ASM-NEXT: .amdhsa_user_sgpr_dispatch_id 1
+// ASM-NEXT: .amdhsa_user_sgpr_private_segment_size 1
+// ASM-NEXT: .amdhsa_enable_private_segment 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_x 0
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_y 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_id_z 1
+// ASM-NEXT: .amdhsa_system_sgpr_workgroup_info 1
+// ASM-NEXT: .amdhsa_system_vgpr_workitem_id 1
+// ASM-NEXT: .amdhsa_next_free_vgpr 9
+// ASM-NEXT: .amdhsa_next_free_sgpr 27
+// ASM-NEXT: .amdhsa_accum_offset 4
+// ASM-NEXT: .amdhsa_reserve_vcc 0
+// ASM-NEXT: .amdhsa_reserve_xnack_mask 1
+// ASM-NEXT: .amdhsa_float_round_mode_32 1
+// ASM-NEXT: .amdhsa_float_round_mode_16_64 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_32 1
+// ASM-NEXT: .amdhsa_float_denorm_mode_16_64 0
+// ASM-NEXT: .amdhsa_dx10_clamp 0
+// ASM-NEXT: .amdhsa_ieee_mode 0
+// ASM-NEXT: .amdhsa_fp16_overflow 1
+// ASM-NEXT: .amdhsa_tg_split 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_invalid_op 1
+// ASM-NEXT: .amdhsa_exception_fp_denorm_src 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_div_zero 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_overflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_underflow 1
+// ASM-NEXT: .amdhsa_exception_fp_ieee_inexact 1
+// ASM-NEXT: .amdhsa_exception_int_div_zero 1
+// ASM-NEXT: .end_amdhsa_kernel
+
+.section .foo
+
+.byte .amdgcn.gfx_generation_number
+// ASM: .byte 9
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v7, s10
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 8
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 11
+
+.set .amdgcn.next_free_vgpr, 0
+.set .amdgcn.next_free_sgpr, 0
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 0
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 0
+
+v_mov_b32_e32 v16, s3
+
+.byte .amdgcn.next_free_vgpr
+// ASM: .byte 17
+.byte .amdgcn.next_free_sgpr
+// ASM: .byte 4
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index 62c2b83e8232e..11fbac825501f 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -150,6 +150,10 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX90C | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX90C %s
# RUN: obj2yaml %t.o.AMDGCN_GFX90C | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX90C %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX940/' %s | yaml2obj -o %t.o.AMDGCN_GFX940
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX940 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX940 %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX940 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX940 %s
+
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1010/' %s | yaml2obj -o %t.o.AMDGCN_GFX1010
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1010 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1010 %s
@@ -321,6 +325,9 @@
# ELF-AMDGCN-GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32)
# YAML-AMDGCN-GFX90C: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX90C ]
+# ELF-AMDGCN-GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
+# YAML-AMDGCN-GFX940: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX940 ]
+
# ELF-AMDGCN-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
# YAML-AMDGCN-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ]
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index 425d631616da3..2f214408ff681 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -58,6 +58,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx940 %t.o > %t-specify.txt
+; RUN: llvm-objdump -D %t.o > %t-detect.txt
+; RUN:
diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90c -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx90c %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
diff --git a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
index c8f44cadde9a4..c6610a11b5be5 100644
--- a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
@@ -196,6 +196,15 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX90C -DFLAG_VALUE=0x32
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -DFLAG_VALUE=0x40
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -DFLAG_VALUE=0x40
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX940 -DFLAG_VALUE=0x40
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 8699356135897..f02ec2af832e9 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1527,6 +1527,7 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
@@ -1581,6 +1582,7 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX909),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90A),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
index bbdd1bb028ecc..408ba9fa07277 100644
--- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt
+++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt
@@ -96,7 +96,7 @@ foreach(sm ${nvptx_sm_list})
endif()
endforeach()
-set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx1010 gfx1030 gfx1031)
+set(amdgpu_mcpus gfx700 gfx701 gfx801 gfx803 gfx900 gfx902 gfx906 gfx908 gfx90a gfx90c gfx940 gfx1010 gfx1030 gfx1031)
if (DEFINED LIBOMPTARGET_AMDGCN_GFXLIST)
set(amdgpu_mcpus ${LIBOMPTARGET_AMDGCN_GFXLIST})
endif()
More information about the Openmp-commits
mailing list