[clang] 9d05727 - AMDGPU: Add basic gfx942 target
Konstantin Zhuravlyov via cfe-commits
cfe-commits at lists.llvm.org
Wed May 10 08:46:28 PDT 2023
Author: Konstantin Zhuravlyov
Date: 2023-05-10T11:51:06-04:00
New Revision: 9d0572797233857397f3fdc35fffcfb490354f56
URL: https://github.com/llvm/llvm-project/commit/9d0572797233857397f3fdc35fffcfb490354f56
DIFF: https://github.com/llvm/llvm-project/commit/9d0572797233857397f3fdc35fffcfb490354f56.diff
LOG: AMDGPU: Add basic gfx942 target
Differential Revision: https://reviews.llvm.org/D149983
Added:
Modified:
clang/include/clang/Basic/Cuda.h
clang/lib/Basic/Cuda.cpp
clang/lib/Basic/Targets/NVPTX.cpp
clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
clang/test/CodeGenOpenCL/amdgpu-features.cl
clang/test/Driver/amdgpu-macros.cl
clang/test/Driver/amdgpu-mcpu.cl
clang/test/Misc/target-invalid-cpu-note.c
llvm/docs/AMDGPUUsage.rst
llvm/include/llvm/BinaryFormat/ELF.h
llvm/include/llvm/TargetParser/TargetParser.h
llvm/lib/Object/ELFObjectFile.cpp
llvm/lib/ObjectYAML/ELFYAML.cpp
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/GCNProcessors.td
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/lib/TargetParser/TargetParser.cpp
llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
llvm/tools/llvm-readobj/ELFDumper.cpp
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h
index dddbd651054da..2b8fc2a0bb1c3 100644
--- a/clang/include/clang/Basic/Cuda.h
+++ b/clang/include/clang/Basic/Cuda.h
@@ -93,6 +93,7 @@ enum class CudaArch {
GFX90c,
GFX940,
GFX941,
+ GFX942,
GFX1010,
GFX1011,
GFX1012,
diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp
index baca1106b263b..db30142ad866d 100644
--- a/clang/lib/Basic/Cuda.cpp
+++ b/clang/lib/Basic/Cuda.cpp
@@ -115,6 +115,7 @@ static const CudaArchToStringMap arch_names[] = {
GFX(90c), // gfx90c
GFX(940), // gfx940
GFX(941), // gfx941
+ GFX(942), // gfx942
GFX(1010), // gfx1010
GFX(1011), // gfx1011
GFX(1012), // gfx1012
diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp
index 17e38a4f1d299..cfcf4ca36f285 100644
--- a/clang/lib/Basic/Targets/NVPTX.cpp
+++ b/clang/lib/Basic/Targets/NVPTX.cpp
@@ -196,6 +196,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts,
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX941:
+ case CudaArch::GFX942:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
index dd0ed791588a9..74f8c19e1bc7e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -3581,6 +3581,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective(
case CudaArch::GFX90c:
case CudaArch::GFX940:
case CudaArch::GFX941:
+ case CudaArch::GFX942:
case CudaArch::GFX1010:
case CudaArch::GFX1011:
case CudaArch::GFX1012:
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 5f452ae63925f..efa5759558cc5 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -31,6 +31,7 @@
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx90c -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX90C %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx940 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX940 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx941 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX941 %s
+// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx942 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX942 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1010 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1011 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1012 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1012 %s
@@ -77,6 +78,7 @@
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX940: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX941: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl
index 58e48bf68f6a2..c2f5b5c7d71d5 100644
--- a/clang/test/Driver/amdgpu-macros.cl
+++ b/clang/test/Driver/amdgpu-macros.cl
@@ -110,6 +110,7 @@
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx90c -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx940 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx941 -DFAMILY=GFX9
+// RUN: %clang -E -dM -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=64 -DCPU=gfx942 -DFAMILY=GFX9
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1010 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1011 -DFAMILY=GFX10
// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1012 -DFAMILY=GFX10
diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl
index 2990aa2e067b5..57e4dd4d9d042 100644
--- a/clang/test/Driver/amdgpu-mcpu.cl
+++ b/clang/test/Driver/amdgpu-mcpu.cl
@@ -94,6 +94,7 @@
// RUN: %clang -### -target amdgcn -mcpu=gfx90c %s 2>&1 | FileCheck --check-prefix=GFX90C %s
// RUN: %clang -### -target amdgcn -mcpu=gfx940 %s 2>&1 | FileCheck --check-prefix=GFX940 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx941 %s 2>&1 | FileCheck --check-prefix=GFX941 %s
+// RUN: %clang -### -target amdgcn -mcpu=gfx942 %s 2>&1 | FileCheck --check-prefix=GFX942 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1010 %s 2>&1 | FileCheck --check-prefix=GFX1010 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1011 %s 2>&1 | FileCheck --check-prefix=GFX1011 %s
// RUN: %clang -### -target amdgcn -mcpu=gfx1012 %s 2>&1 | FileCheck --check-prefix=GFX1012 %s
@@ -135,6 +136,7 @@
// GFX90C: "-target-cpu" "gfx90c"
// GFX940: "-target-cpu" "gfx940"
// GFX941: "-target-cpu" "gfx941"
+// GFX942: "-target-cpu" "gfx942"
// GFX1010: "-target-cpu" "gfx1010"
// GFX1011: "-target-cpu" "gfx1011"
// GFX1012: "-target-cpu" "gfx1012"
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 20742d887ef9e..598a12fac33fd 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -29,7 +29,7 @@
// RUN: not %clang_cc1 -triple nvptx--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix NVPTX
// NVPTX: error: unknown target CPU 'not-a-cpu'
-// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
+// NVPTX-NEXT: note: valid target CPU values are: sm_20, sm_21, sm_30, sm_32, sm_35, sm_37, sm_50, sm_52, sm_53, sm_60, sm_61, sm_62, sm_70, sm_72, sm_75, sm_80, sm_86, sm_87, sm_89, sm_90, gfx600, gfx601, gfx602, gfx700, gfx701, gfx702, gfx703, gfx704, gfx705, gfx801, gfx802, gfx803, gfx805, gfx810, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
// RUN: not %clang_cc1 -triple r600--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix R600
// R600: error: unknown target CPU 'not-a-cpu'
@@ -37,7 +37,7 @@
// RUN: not %clang_cc1 -triple amdgcn--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix AMDGCN
// AMDGCN: error: unknown target CPU 'not-a-cpu'
-// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
+// AMDGCN-NEXT: note: valid target CPU values are: gfx600, tahiti, gfx601, pitcairn, verde, gfx602, hainan, oland, gfx700, kaveri, gfx701, hawaii, gfx702, gfx703, kabini, mullins, gfx704, bonaire, gfx705, gfx801, carrizo, gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, gfx908, gfx909, gfx90a, gfx90c, gfx940, gfx941, gfx942, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, gfx1032, gfx1033, gfx1034, gfx1035, gfx1036, gfx1100, gfx1101, gfx1102, gfx1103{{$}}
// RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM
// WEBASM: error: unknown target CPU 'not-a-cpu'
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index dcd73386205e7..405ffceadf75b 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -392,6 +392,13 @@ Every processor supports every OS ABI (see :ref:`amdgpu-os`) with the following
work-item Add product
IDs names.
+ ``gfx942`` ``amdgcn`` dGPU - sramecc - Architected *TBA*
+ - tgsplit flat
+ - xnack scratch .. TODO::
+ - Packed
+ work-item Add product
+ IDs names.
+
**GCN GFX10.1 (RDNA 1)** [AMD-GCN-GFX10-RDNA1]_
-----------------------------------------------------------------------------------------------------------------------
``gfx1010`` ``amdgcn`` dGPU - cumode - Absolute - *rocm-amdhsa* - Radeon RX 5700
@@ -1332,6 +1339,7 @@ The AMDGPU backend uses the following ELF header:
*reserved* 0x049 Reserved.
*reserved* 0x04a Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
+ ``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
==================================== ========== =============================
Sections
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 6b9752f65189f..fbe1d1fabdab7 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -782,10 +782,11 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4A = 0x04a,
EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
+ EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
// First/last AMDGCN-based processors.
EF_AMDGPU_MACH_AMDGCN_FIRST = EF_AMDGPU_MACH_AMDGCN_GFX600,
- EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX941,
+ EF_AMDGPU_MACH_AMDGCN_LAST = EF_AMDGPU_MACH_AMDGCN_GFX942,
// Indicates if the "xnack" target feature is enabled for all code contained
// in the object.
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index f30b10081eed0..0ab2c4ae12d12 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -85,6 +85,7 @@ enum GPUKind : uint32_t {
GK_GFX90C = 67,
GK_GFX940 = 68,
GK_GFX941 = 69,
+ GK_GFX942 = 70,
GK_GFX1010 = 71,
GK_GFX1011 = 72,
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 6b1e7486aa74a..204ba6be7221b 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -465,6 +465,8 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx940";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941:
return "gfx941";
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
+ return "gfx942";
// AMDGCN GFX10.
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010:
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index fe113246e3f33..1ec480f86ad64 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -591,6 +591,7 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
+ BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1011, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1012, EF_AMDGPU_MACH);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index e1feda9e05224..f9608ad05df0a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1261,6 +1261,40 @@ def FeatureISAVersion9_4_1 : FeatureSet<
FullRate64Ops,
FeatureBackOffBarrier]>;
+def FeatureISAVersion9_4_2 : FeatureSet<
+ [FeatureGFX9,
+ FeatureGFX90AInsts,
+ FeatureGFX940Insts,
+ FeatureFmaMixInsts,
+ FeatureLDSBankCount32,
+ FeatureDLInsts,
+ FeatureFmacF64Inst,
+ FeatureDot1Insts,
+ FeatureDot2Insts,
+ FeatureDot3Insts,
+ FeatureDot4Insts,
+ FeatureDot5Insts,
+ FeatureDot6Insts,
+ FeatureDot7Insts,
+ FeatureDot10Insts,
+ FeatureAtomicDsPkAdd16Insts,
+ FeatureAtomicFlatPkAdd16Insts,
+ Feature64BitDPP,
+ FeaturePackedFP32Ops,
+ FeatureMAIInsts,
+ FeatureFP8Insts,
+ FeaturePkFmacF16Inst,
+ FeatureAtomicFaddRtnInsts,
+ FeatureAtomicFaddNoRtnInsts,
+ FeatureAtomicBufferGlobalPkAddF16Insts,
+ FeatureAtomicGlobalPkAddBF16Inst,
+ FeatureFlatAtomicFaddF32Inst,
+ FeatureSupportsSRAMECC,
+ FeaturePackedTID,
+ FeatureArchitectedFlatScratch,
+ FullRate64Ops,
+ FeatureBackOffBarrier]>;
+
// TODO: Organize more features into groups.
def FeatureGroup {
// Bugs present on gfx10.1.
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index dbc95e1a96162..186a39ca44ba9 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -200,6 +200,10 @@ def : ProcessorModel<"gfx941", SIDPGFX940FullSpeedModel,
FeatureISAVersion9_4_1.Features
>;
+def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
+ FeatureISAVersion9_4_2.Features
+>;
+
//===----------------------------------------------------------------------===//
// GCN GFX10.
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 188ce573791b5..d08dfbc2a70af 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -108,6 +108,7 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
+ case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011: AK = GK_GFX1011; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012: AK = GK_GFX1012; break;
@@ -178,6 +179,7 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
+ case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
case GK_GFX1011: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1011;
case GK_GFX1012: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1012;
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index ae5d8d26d9120..b2b96c4d4bf19 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -106,6 +106,7 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
+ {{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1011"}, {"gfx1011"}, GK_GFX1011, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
{{"gfx1012"}, {"gfx1012"}, GK_GFX1012, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK},
@@ -226,6 +227,7 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX90C: return {9, 0, 12};
case GK_GFX940: return {9, 4, 0};
case GK_GFX941: return {9, 4, 1};
+ case GK_GFX942: return {9, 4, 2};
case GK_GFX1010: return {10, 1, 0};
case GK_GFX1011: return {10, 1, 1};
case GK_GFX1012: return {10, 1, 2};
@@ -324,6 +326,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["s-memrealtime"] = true;
Features["s-memtime-inst"] = true;
break;
+ case GK_GFX942:
case GK_GFX941:
case GK_GFX940:
Features["gfx940-insts"] = true;
diff --git a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
index bca94028bf87c..abf9d89450b8e 100644
--- a/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
+++ b/llvm/test/CodeGen/AMDGPU/directive-amdgcn-target.ll
@@ -77,6 +77,9 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 < %s | FileCheck --check-prefixes=GFX941 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX941-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX941-XNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX942-NOXNACK %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX942-XNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefixes=GFX1010 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack < %s | FileCheck --check-prefixes=GFX1010-NOXNACK %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=+xnack < %s | FileCheck --check-prefixes=GFX1010-XNACK %s
@@ -158,6 +161,9 @@
; GFX941: .amdgcn_target "amdgcn-amd-amdhsa--gfx941"
; GFX941-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack-"
; GFX941-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx941:xnack+"
+; GFX942: .amdgcn_target "amdgcn-amd-amdhsa--gfx942"
+; GFX942-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack-"
+; GFX942-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx942:xnack+"
; GFX1010: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010"
; GFX1010-NOXNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack-"
; GFX1010-XNACK: .amdgcn_target "amdgcn-amd-amdhsa--gfx1010:xnack+"
diff --git a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
index b455841680122..9e532812dcf1e 100644
--- a/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
+++ b/llvm/test/CodeGen/AMDGPU/elf-header-flags-mach.ll
@@ -56,6 +56,7 @@
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx90c < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX90C %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx940 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX940 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx941 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX941 %s
+; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx942 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX942 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1010 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1010 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1011 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1011 %s
; RUN: llc -filetype=obj -march=amdgcn -mcpu=gfx1012 < %s | llvm-readobj --file-header - | FileCheck --check-prefixes=ALL,ARCH-GCN,GFX1012 %s
@@ -124,6 +125,7 @@
; GFX90C: EF_AMDGPU_MACH_AMDGCN_GFX90C (0x32)
; GFX940: EF_AMDGPU_MACH_AMDGCN_GFX940 (0x40)
; GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
+; GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
; GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
; GFX1011: EF_AMDGPU_MACH_AMDGCN_GFX1011 (0x34)
; GFX1012: EF_AMDGPU_MACH_AMDGCN_GFX1012 (0x35)
diff --git a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
index e3f9de43fc0e0..048fc7049ddad 100644
--- a/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
+++ b/llvm/test/Object/AMDGPU/elf-header-flags-mach.yaml
@@ -158,6 +158,10 @@
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX941 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX941 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX941 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX941 %s
+# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX942/' %s | yaml2obj -o %t.o.AMDGCN_GFX942
+# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX942 %s
+# RUN: obj2yaml %t.o.AMDGCN_GFX942 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX942 %s
+
# RUN: sed -e 's/<BITS>/64/' -e 's/<MACH>/AMDGCN_GFX1010/' %s | yaml2obj -o %t.o.AMDGCN_GFX1010
# RUN: llvm-readobj -S --file-headers %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=ELF-AMDGCN-ALL,ELF-AMDGCN-GFX1010 %s
# RUN: obj2yaml %t.o.AMDGCN_GFX1010 | FileCheck --check-prefixes=YAML-AMDGCN-ALL,YAML-AMDGCN-GFX1010 %s
@@ -355,6 +359,9 @@
# ELF-AMDGCN-GFX941: EF_AMDGPU_MACH_AMDGCN_GFX941 (0x4B)
# YAML-AMDGCN-GFX941: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX941 ]
+# ELF-AMDGCN-GFX942: EF_AMDGPU_MACH_AMDGCN_GFX942 (0x4C)
+# YAML-AMDGCN-GFX942: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX942 ]
+
# ELF-AMDGCN-GFX1010: EF_AMDGPU_MACH_AMDGCN_GFX1010 (0x33)
# YAML-AMDGCN-GFX1010: Flags: [ EF_AMDGPU_MACH_AMDGCN_GFX1010 ]
diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
index 0b7349169e0e9..2bf7cf8de4e67 100644
--- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
+++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/subtarget.ll
@@ -85,6 +85,11 @@ define amdgpu_kernel void @test_kernel() {
; ----------------------------------GFX9---------------------------------------
;
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -filetype=obj -O0 -o %t.o %s
+; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx942 %t.o > %t-specify.txt
+; RUN: llvm-objdump -D %t.o > %t-detect.txt
+; RUN:
diff %t-specify.txt %t-detect.txt
+
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx941 -filetype=obj -O0 -o %t.o %s
; RUN: llvm-objdump -D --arch-name=amdgcn --mcpu=gfx941 %t.o > %t-specify.txt
; RUN: llvm-objdump -D %t.o > %t-detect.txt
diff --git a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test b/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
index c465fc0c9e398..193ab5740af90 100644
--- a/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/amdgpu-elf-headers.test
@@ -214,6 +214,15 @@
# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX941 -DFLAG_VALUE=0x4B
+# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=1 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=1 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C
+
+# RUN: yaml2obj %s -o %t -DABI_VERSION=2 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942
+# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=2 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX942 -DFLAG_VALUE=0x4C
+
# RUN: yaml2obj %s -o %t -DABI_VERSION=0 -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010
# RUN: llvm-readobj -h %t | FileCheck %s --check-prefixes=ALL,KNOWN-ABI-VERSION,SINGLE-FLAG --match-full-lines -DABI_VERSION=0 -DFILE=%t -DFLAG_NAME=EF_AMDGPU_MACH_AMDGCN_GFX1010 -DFLAG_VALUE=0x33
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index a67640a24e625..eddffa08cd7bd 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1589,6 +1589,7 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion3[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX941),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX942),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
@@ -1650,6 +1651,7 @@ const EnumEntry<unsigned> ElfHeaderAMDGPUFlagsABIVersion4[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX90C),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX940),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX941),
+ LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX942),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1010),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1011),
LLVM_READOBJ_ENUM_ENT(ELF, EF_AMDGPU_MACH_AMDGCN_GFX1012),
More information about the cfe-commits
mailing list