[llvm] 8615f9a - [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm (#126763)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 19 01:20:52 PST 2025
Author: Fabian Ritter
Date: 2025-02-19T10:20:48+01:00
New Revision: 8615f9aaffd4337a33ea979f010c4d6410ba6125
URL: https://github.com/llvm/llvm-project/commit/8615f9aaffd4337a33ea979f010c4d6410ba6125
DIFF: https://github.com/llvm/llvm-project/commit/8615f9aaffd4337a33ea979f010c4d6410ba6125.diff
LOG: [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm (#126763)
gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.
This PR removes all non-documentation occurrences of gfx940/gfx941 from
the llvm directory, and the remaining occurrences in clang.
Documentation changes will follow.
For SWDEV-512631
Added:
Modified:
clang/test/Misc/target-invalid-cpu-note/amdgcn.c
llvm/docs/AMDGPUUsage.rst
llvm/include/llvm/BinaryFormat/ELF.h
llvm/include/llvm/IR/IntrinsicsAMDGPU.td
llvm/include/llvm/TargetParser/TargetParser.h
llvm/lib/Object/ELFObjectFile.cpp
llvm/lib/ObjectYAML/ELFYAML.cpp
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
llvm/lib/Target/AMDGPU/DSInstructions.td
llvm/lib/Target/AMDGPU/FLATInstructions.td
llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
llvm/lib/Target/AMDGPU/GCNProcessors.td
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
llvm/lib/Target/AMDGPU/SIDefines.h
llvm/lib/Target/AMDGPU/SIISelLowering.cpp
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
llvm/lib/Target/AMDGPU/SISchedule.td
llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
llvm/lib/TargetParser/TargetParser.cpp
llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
llvm/tools/llvm-readobj/ELFDumper.cpp
Removed:
################################################################################
diff --git a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
index 642d2df211c21..9ef44b2bb403e 100644
--- a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
+++ b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
@@ -45,8 +45,6 @@
// CHECK-SAME: {{^}}, gfx909
// CHECK-SAME: {{^}}, gfx90a
// CHECK-SAME: {{^}}, gfx90c
-// CHECK-SAME: {{^}}, gfx940
-// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
// CHECK-SAME: {{^}}, gfx950
// CHECK-SAME: {{^}}, gfx1010
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 5966d1617feee..936e8e2960bf1 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -2232,7 +2232,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
- ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
+ *reserved* 0x040 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
@@ -2243,7 +2243,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
*reserved* 0x049 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
- ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
+ *reserved* 0x04b Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
*reserved* 0x04d Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 64f643749d6ac..37eab89e706db 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -814,7 +814,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
- EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
+ EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040,
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
@@ -825,7 +825,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
- EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
+ EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4B = 0x04b,
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 9558f2b9b74e0..1e4f25c642493 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1074,7 +1074,7 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
// bit 0 = glc, bit 1 = slc,
// bit 2 = dlc (gfx10/gfx11),
// bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope
!listconcat(props, [IntrNoCallback, IntrNoFree, IntrWillReturn],
!if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),
@@ -1321,7 +1321,7 @@ def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// Note: volatile bit is **not** permitted here.
@@ -1351,7 +1351,7 @@ class AMDGPURawBufferLoad : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1381,7 +1381,7 @@ class AMDGPURawPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1413,7 +1413,7 @@ class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1431,7 +1431,7 @@ class AMDGPUStructAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1448,7 +1448,7 @@ class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIn
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1467,7 +1467,7 @@ class AMDGPUStructPtrAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsi
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1485,7 +1485,7 @@ class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrins
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1503,7 +1503,7 @@ class AMDGPURawPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntr
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1523,7 +1523,7 @@ class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntr
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1542,7 +1542,7 @@ class AMDGPUStructPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsI
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1628,7 +1628,7 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
// gfx908 intrinsic
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx950, gfx12+.
+// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx942, gfx950, gfx12+.
def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
@@ -1727,7 +1727,7 @@ def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
[IntrReadMem,
@@ -1743,7 +1743,7 @@ def int_amdgcn_raw_ptr_tbuffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1761,7 +1761,7 @@ def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1779,7 +1779,7 @@ def int_amdgcn_raw_ptr_tbuffer_store : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1797,7 +1797,7 @@ def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1815,7 +1815,7 @@ def int_amdgcn_struct_ptr_tbuffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1834,7 +1834,7 @@ def int_amdgcn_struct_ptr_tbuffer_store : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1853,7 +1853,7 @@ def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1872,7 +1872,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1891,7 +1891,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1914,7 +1914,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1934,7 +1934,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -3007,7 +3007,7 @@ def int_amdgcn_fdot2_f32_bf16 :
// f32 %r = llvm.amdgcn.fdot2c.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + c
// TODO: This actually is similar to llvm.amdgcn.fdot2 intrinsics which produces
-// v_dot2c_f32_f16 on gfx940. Maybe we can consolidate these.
+// v_dot2c_f32_f16 on gfx942. Maybe we can consolidate these.
def int_amdgcn_fdot2c_f32_bf16 :
ClangBuiltin<"__builtin_amdgcn_fdot2c_f32_bf16">,
@@ -3250,7 +3250,7 @@ def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, ll
def int_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
-// Note: in gfx940 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA.
+// Note: in gfx942 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA.
// Three bits corresponding to the neg modifier applied to the respective
// source operand.
def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic<llvm_v4f64_ty, llvm_double_ty>;
@@ -3258,7 +3258,7 @@ def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic<llvm_double_ty, ll
}
//===----------------------------------------------------------------------===//
-// gfx940 intrinsics
+// gfx942 intrinsics
// ===----------------------------------------------------------------------===//
class AMDGPUMFp8MfmaIntrinsic<LLVMType DestTy> :
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 55e7b417428c4..f776b41f3d7ca 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -83,8 +83,6 @@ enum GPUKind : uint32_t {
GK_GFX909 = 65,
GK_GFX90A = 66,
GK_GFX90C = 67,
- GK_GFX940 = 68,
- GK_GFX941 = 69,
GK_GFX942 = 70,
GK_GFX950 = 71,
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 2d3d70db50c39..ac25d76709726 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -545,10 +545,6 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx90a";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C:
return "gfx90c";
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:
- return "gfx940";
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941:
- return "gfx941";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
return "gfx942";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 05e4d85b2ea5d..1f970739c1e7e 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -609,8 +609,6 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
- BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
- BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 6439149d801f6..3aabca49b249e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1619,28 +1619,6 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureAtomicBufferPkAddBF16Inst
])>;
-def FeatureISAVersion9_4_0 : FeatureSet<
- !listconcat(FeatureISAVersion9_4_Common.Features,
- [
- FeatureAddressableLocalMemorySize65536,
- FeatureForceStoreSC0SC1,
- FeatureFP8Insts,
- FeatureFP8ConversionInsts,
- FeatureCvtFP8VOP1Bug,
- FeatureXF32Insts
- ])>;
-
-def FeatureISAVersion9_4_1 : FeatureSet<
- !listconcat(FeatureISAVersion9_4_Common.Features,
- [
- FeatureAddressableLocalMemorySize65536,
- FeatureForceStoreSC0SC1,
- FeatureFP8Insts,
- FeatureFP8ConversionInsts,
- FeatureCvtFP8VOP1Bug,
- FeatureXF32Insts
- ])>;
-
def FeatureISAVersion9_4_2 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3bbbbcf71d8ae..cf3843869808b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4295,7 +4295,7 @@ AMDGPUInstructionSelector::selectVOP3PModsImpl(
// TODO: Handle G_FSUB 0 as fneg
// TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
- (void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard()
+ (void)IsDOT; // DOTs do not use OPSEL on gfx942+, check ST.hasDOTOpSelHazard()
// Packed instructions do not have abs modifiers.
Mods |= SISrcMods::OP_SEL_1;
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 9ca853befba73..d3487daee364f 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1773,7 +1773,7 @@ def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>;
def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>;
def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
-// GFX940+.
+// GFX942+.
def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>;
def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index ea6e703eba5d9..7988a9ac0ce55 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -814,7 +814,7 @@ defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
} // End SubtargetPredicate = isGFX7GFX10GFX11
-// GFX940-, GFX11-only flat instructions.
+// GFX942-, GFX11-only flat instructions.
let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>;
} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst
@@ -2076,7 +2076,7 @@ defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>;
defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>;
let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
- // These instructions are encoded
diff erently on gfx90* and gfx940.
+ // These instructions are encoded
diff erently on gfx90* and gfx94*.
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
}
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 827598078af53..1ff75095b220a 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -2292,7 +2292,7 @@ GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx940 | gfx950
+ // xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
// 4 pass | 7 8
// 8 pass | 11 12
@@ -2600,7 +2600,7 @@ static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx940 | gfx950
+ // xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
// 4 pass | 7 8
// 8 pass | 11 12
@@ -2610,7 +2610,7 @@ static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx940 | gfx950
+ // xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
// 4 pass | 7 8
// 8 pass | 11 12
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index a86c76bb6075e..0b372e29efe67 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -192,15 +192,7 @@ def : ProcessorModel<"gfx90c", SIQuarterSpeedModel,
FeatureISAVersion9_0_C.Features
>;
-def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel,
- FeatureISAVersion9_4_0.Features
->;
-
-def : ProcessorModel<"gfx941", SIDPGFX940FullSpeedModel,
- FeatureISAVersion9_4_1.Features
->;
-
-def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
+def : ProcessorModel<"gfx942", SIDPGFX942FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
@@ -213,8 +205,8 @@ def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
FeatureISAVersion9_Generic.Features
>;
-// [gfx940, gfx941, gfx942]
-def : ProcessorModel<"gfx9-4-generic", SIDPGFX940FullSpeedModel,
+// [gfx942]
+def : ProcessorModel<"gfx9-4-generic", SIDPGFX942FullSpeedModel,
FeatureISAVersion9_4_Generic.Features
>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 342b211199dca..f7c5c472c93a5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -1297,11 +1297,11 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasPackedTID() const { return HasPackedTID; }
- // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
+ // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
// hasGFX90AInsts is also true.
bool hasGFX940Insts() const { return GFX940Insts; }
- // GFX950 is a derivation to GFX940. hasGFX950Insts() implies that
+ // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
// hasGFX940Insts and hasGFX90AInsts are also true.
bool hasGFX950Insts() const { return GFX950Insts; }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 059bab5838526..4a4ad712e304d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -93,8 +93,6 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
@@ -180,8 +178,6 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
- case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
- case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index f812ae652b63d..721601efcc804 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -542,7 +542,7 @@ enum Id { // HwRegCode, (6) [5:0]
ID_EXCP_FLAG_USER = 18,
ID_TRAP_CTRL = 19,
- // GFX940 specific registers
+ // GFX94* specific registers
ID_XCC_ID = 20,
ID_SQ_PERF_SNAPSHOT_DATA = 21,
ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e09b310d107ac..909ad07782fc6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16823,39 +16823,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// safe. The message phrasing also should be better.
if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
if (AS == AMDGPUAS::FLAT_ADDRESS) {
- // gfx940, gfx12
+ // gfx942, gfx12
if (Subtarget->hasAtomicFlatPkAdd16Insts() && isV2F16OrV2BF16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
} else if (AMDGPU::isExtendedGlobalAddrSpace(AS)) {
- // gfx90a, gfx940, gfx12
+ // gfx90a, gfx942, gfx12
if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
- // gfx940, gfx12
+ // gfx942, gfx12
if (Subtarget->hasAtomicGlobalPkAddBF16Inst() && isV2BF16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
} else if (AS == AMDGPUAS::BUFFER_FAT_POINTER) {
- // gfx90a, gfx940, gfx12
+ // gfx90a, gfx942, gfx12
if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
- // While gfx90a/gfx940 supports v2bf16 for global/flat, it does not for
+ // While gfx90a/gfx942 supports v2bf16 for global/flat, it does not for
// buffer. gfx12 does have the buffer version.
if (Subtarget->hasAtomicBufferPkAddBF16Inst() && isV2BF16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
}
- // global and flat atomic fadd f64: gfx90a, gfx940.
+ // global and flat atomic fadd f64: gfx90a, gfx942.
if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
if (AS != AMDGPUAS::FLAT_ADDRESS) {
if (Ty->isFloatTy()) {
- // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940,
+ // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx942,
// gfx11+.
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
- // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
+ // global/buffer atomic fadd f32 rtn: gfx90a, gfx942, gfx11+.
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
} else {
@@ -16867,7 +16867,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
}
}
- // flat atomic fadd f32: gfx940, gfx11+.
+ // flat atomic fadd f32: gfx942, gfx11+.
if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
if (Subtarget->hasFlatAtomicFaddF32Inst())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
@@ -16906,7 +16906,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// float, double restored in gfx10.
// double removed again in gfx11, so only f32 for gfx11/gfx12.
//
- // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
+ // For gfx9, gfx90a and gfx942 support f64 for global (same as fadd), but
// no f32.
if (AS == AMDGPUAS::FLAT_ADDRESS) {
if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index be6cff873532b..79fb36acc0ea7 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -492,7 +492,6 @@ class SIGfx940CacheControl : public SIGfx90ACacheControl {
}
public:
-
SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 117add324db56..2a374b360b04a 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -94,7 +94,7 @@ class SISchedMachineModel : SchedMachineModel {
def SIFullSpeedModel : SISchedMachineModel;
def SIQuarterSpeedModel : SISchedMachineModel;
def SIDPFullSpeedModel : SISchedMachineModel;
-def SIDPGFX940FullSpeedModel : SISchedMachineModel;
+def SIDPGFX942FullSpeedModel : SISchedMachineModel;
def SIDPGFX950FullSpeedModel : SISchedMachineModel;
def GFX10SpeedModel : SISchedMachineModel;
def GFX11SpeedModel : SISchedMachineModel;
@@ -276,7 +276,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>;
} // End SchedModel = SIDPFullSpeedModel
-let SchedModel = SIDPGFX940FullSpeedModel in {
+let SchedModel = SIDPGFX942FullSpeedModel in {
defm : SICommonWriteRes;
@@ -308,7 +308,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>;
def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>;
def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>;
-} // End SchedModel = SIDPGFX940FullSpeedModel
+} // End SchedModel = SIDPGFX942FullSpeedModel
let SchedModel = SIDPGFX950FullSpeedModel in {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index a8e4ce133ffbc..e433b85489e6e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -216,7 +216,7 @@ static constexpr CustomOperand Operands[] = {
{{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
{{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},
- // GFX940 specific registers
+ // GFX942 specific registers
{{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
{{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
{{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 0a605dfd017cb..8731a16b88a5c 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -104,8 +104,6 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
- {{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
@@ -260,8 +258,6 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX909: return {9, 0, 9};
case GK_GFX90A: return {9, 0, 10};
case GK_GFX90C: return {9, 0, 12};
- case GK_GFX940: return {9, 4, 0};
- case GK_GFX941: return {9, 4, 1};
case GK_GFX942: return {9, 4, 2};
case GK_GFX950: return {9, 5, 0};
case GK_GFX1010: return {10, 1, 0};
@@ -506,8 +502,6 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gfx950-insts"] = true;
[[fallthrough]];
case GK_GFX942:
- case GK_GFX941:
- case GK_GFX940:
Features["fp8-insts"] = true;
Features["fp8-conversion-insts"] = true;
if (Kind != GK_GFX950)
diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
index b008f397318e8..89c9801b5e466 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -amdgpu-kernarg-preload-count=16 -S < %s 2>&1 \
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -amdgpu-kernarg-preload-count=16 -S < %s 2>&1 \
; RUN: | FileCheck --match-full-lines --implicit-check-not='declare' %s
; Confirms we do not leave behind a declaration which references the same
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 2da08127f20a8..fdae09ac767e6 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1624,8 +1624,6 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
More information about the llvm-commits
mailing list