[clang] [llvm] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm (PR #126763)
Fabian Ritter via cfe-commits
cfe-commits at lists.llvm.org
Wed Feb 19 01:18:09 PST 2025
https://github.com/ritter-x2a updated https://github.com/llvm/llvm-project/pull/126763
>From 6ecc50b535007d6d96cb9fd4e543cc7c359e2a49 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Tue, 11 Feb 2025 11:31:20 -0500
Subject: [PATCH 1/3] [AMDGPU] Replace gfx940 and gfx941 with gfx942 in llvm
gfx940 and gfx941 are no longer supported. This is one of a series of
PRs to remove them from the code base.
This PR removes all non-documentation occurrences of gfx940/gfx941 from
the llvm directory, and the remaining occurrences in clang.
Documentation changes will follow.
For SWDEV-512631
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 4 +-
.../CodeGenCXX/dynamic-cast-address-space.cpp | 4 +-
clang/test/CodeGenOpenCL/amdgpu-features.cl | 6 +-
.../builtins-amdgcn-fp-atomics-gfx90a-err.cl | 2 +-
.../Misc/target-invalid-cpu-note/amdgcn.c | 2 -
llvm/docs/AMDGPUUsage.rst | 4 +-
llvm/include/llvm/BinaryFormat/ELF.h | 4 +-
llvm/include/llvm/IR/IntrinsicsAMDGPU.td | 56 +++----
llvm/include/llvm/TargetParser/TargetParser.h | 2 -
llvm/lib/Object/ELFObjectFile.cpp | 4 -
llvm/lib/ObjectYAML/ELFYAML.cpp | 2 -
llvm/lib/Target/AMDGPU/AMDGPU.td | 68 +++-----
.../AMDGPU/AMDGPUInstructionSelector.cpp | 2 +-
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 28 ++--
llvm/lib/Target/AMDGPU/BUFInstructions.td | 22 +--
llvm/lib/Target/AMDGPU/DSInstructions.td | 2 +-
.../Disassembler/AMDGPUDisassembler.cpp | 6 +-
llvm/lib/Target/AMDGPU/FLATInstructions.td | 96 +++++------
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 99 +++++------
llvm/lib/Target/AMDGPU/GCNProcessors.td | 14 +-
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 41 ++---
.../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 21 +--
.../MCTargetDesc/AMDGPUTargetStreamer.cpp | 4 -
llvm/lib/Target/AMDGPU/SIDefines.h | 4 +-
llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 20 +--
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 +-
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 45 ++---
llvm/lib/Target/AMDGPU/SISchedule.td | 6 +-
.../Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 12 +-
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 8 +-
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 6 +-
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 +-
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 8 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 +-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 154 +++++++++---------
llvm/lib/TargetParser/TargetParser.cpp | 10 +-
llvm/tools/llvm-readobj/ELFDumper.cpp | 2 -
38 files changed, 352 insertions(+), 436 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index 39e295aced96b..e7e5ed77f432b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -248,13 +248,13 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts")
+TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx942-insts")
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts")
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts")
TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx942-insts")
//===----------------------------------------------------------------------===//
// Deep learning builtins.
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index 0460352cf7ffc..f07dbd9a29b98 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -112,9 +112,9 @@ const B& f(A *a) {
// CHECK: attributes #[[ATTR3]] = { nounwind }
// CHECK: attributes #[[ATTR4]] = { noreturn }
//.
-// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
+// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
-// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
+// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
//.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index d12dcead6fadf..2c9f3c78b1df2 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -83,9 +83,9 @@
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
-// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
+// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
index f651ce349e206..86d84005133bc 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
@@ -9,7 +9,7 @@ typedef short __attribute__((ext_vector_type(2))) short2;
void test_atomic_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2,
__global short2 *addrs2, __local short2 *addrs2l, short2 xs2,
__global float *addrf, float xf) {
- __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx940-insts}}
+ __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx942-insts}}
__builtin_amdgcn_flat_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2f16' needs target feature atomic-flat-pk-add-16-insts}}
__builtin_amdgcn_flat_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2bf16' needs target feature atomic-flat-pk-add-16-insts}}
__builtin_amdgcn_global_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2bf16' needs target feature atomic-global-pk-add-bf16-inst}}
diff --git a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
index 642d2df211c21..9ef44b2bb403e 100644
--- a/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
+++ b/clang/test/Misc/target-invalid-cpu-note/amdgcn.c
@@ -45,8 +45,6 @@
// CHECK-SAME: {{^}}, gfx909
// CHECK-SAME: {{^}}, gfx90a
// CHECK-SAME: {{^}}, gfx90c
-// CHECK-SAME: {{^}}, gfx940
-// CHECK-SAME: {{^}}, gfx941
// CHECK-SAME: {{^}}, gfx942
// CHECK-SAME: {{^}}, gfx950
// CHECK-SAME: {{^}}, gfx1010
diff --git a/llvm/docs/AMDGPUUsage.rst b/llvm/docs/AMDGPUUsage.rst
index 5966d1617feee..936e8e2960bf1 100644
--- a/llvm/docs/AMDGPUUsage.rst
+++ b/llvm/docs/AMDGPUUsage.rst
@@ -2232,7 +2232,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX1035`` 0x03d ``gfx1035``
``EF_AMDGPU_MACH_AMDGCN_GFX1034`` 0x03e ``gfx1034``
``EF_AMDGPU_MACH_AMDGCN_GFX90A`` 0x03f ``gfx90a``
- ``EF_AMDGPU_MACH_AMDGCN_GFX940`` 0x040 ``gfx940``
+ *reserved* 0x040 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1100`` 0x041 ``gfx1100``
``EF_AMDGPU_MACH_AMDGCN_GFX1013`` 0x042 ``gfx1013``
``EF_AMDGPU_MACH_AMDGCN_GFX1150`` 0x043 ``gfx1150``
@@ -2243,7 +2243,7 @@ The AMDGPU backend uses the following ELF header:
``EF_AMDGPU_MACH_AMDGCN_GFX1200`` 0x048 ``gfx1200``
*reserved* 0x049 Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1151`` 0x04a ``gfx1151``
- ``EF_AMDGPU_MACH_AMDGCN_GFX941`` 0x04b ``gfx941``
+ *reserved* 0x04b Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX942`` 0x04c ``gfx942``
*reserved* 0x04d Reserved.
``EF_AMDGPU_MACH_AMDGCN_GFX1201`` 0x04e ``gfx1201``
diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h
index 64f643749d6ac..37eab89e706db 100644
--- a/llvm/include/llvm/BinaryFormat/ELF.h
+++ b/llvm/include/llvm/BinaryFormat/ELF.h
@@ -814,7 +814,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1035 = 0x03d,
EF_AMDGPU_MACH_AMDGCN_GFX1034 = 0x03e,
EF_AMDGPU_MACH_AMDGCN_GFX90A = 0x03f,
- EF_AMDGPU_MACH_AMDGCN_GFX940 = 0x040,
+ EF_AMDGPU_MACH_AMDGCN_RESERVED_0X40 = 0x040,
EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041,
EF_AMDGPU_MACH_AMDGCN_GFX1013 = 0x042,
EF_AMDGPU_MACH_AMDGCN_GFX1150 = 0x043,
@@ -825,7 +825,7 @@ enum : unsigned {
EF_AMDGPU_MACH_AMDGCN_GFX1200 = 0x048,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X49 = 0x049,
EF_AMDGPU_MACH_AMDGCN_GFX1151 = 0x04a,
- EF_AMDGPU_MACH_AMDGCN_GFX941 = 0x04b,
+ EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4B = 0x04b,
EF_AMDGPU_MACH_AMDGCN_GFX942 = 0x04c,
EF_AMDGPU_MACH_AMDGCN_RESERVED_0X4D = 0x04d,
EF_AMDGPU_MACH_AMDGCN_GFX1201 = 0x04e,
diff --git a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
index 9558f2b9b74e0..1e4f25c642493 100644
--- a/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
+++ b/llvm/include/llvm/IR/IntrinsicsAMDGPU.td
@@ -1074,7 +1074,7 @@ class AMDGPUImageDimIntrinsic<AMDGPUDimProfile P_,
// bit 0 = glc, bit 1 = slc,
// bit 2 = dlc (gfx10/gfx11),
// bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope
!listconcat(props, [IntrNoCallback, IntrNoFree, IntrWillReturn],
!if(P_.IsAtomic, [], [ImmArg<ArgIndex<AMDGPUImageDimIntrinsicEval<P_>.DmaskArgIndex>>]),
@@ -1321,7 +1321,7 @@ def int_amdgcn_s_buffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// Note: volatile bit is **not** permitted here.
@@ -1351,7 +1351,7 @@ class AMDGPURawBufferLoad : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1381,7 +1381,7 @@ class AMDGPURawPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1413,7 +1413,7 @@ class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntri
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1431,7 +1431,7 @@ class AMDGPUStructAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1448,7 +1448,7 @@ class AMDGPUStructPtrBufferLoad<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIn
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1467,7 +1467,7 @@ class AMDGPUStructPtrAtomicBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsi
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1485,7 +1485,7 @@ class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntrins
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1503,7 +1503,7 @@ class AMDGPURawPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntr
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1523,7 +1523,7 @@ class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsIntr
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1542,7 +1542,7 @@ class AMDGPUStructPtrBufferStore<LLVMType data_ty = llvm_any_ty> : DefaultAttrsI
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1628,7 +1628,7 @@ def int_amdgcn_raw_ptr_buffer_atomic_cmpswap : Intrinsic<
// gfx908 intrinsic
def int_amdgcn_raw_buffer_atomic_fadd : AMDGPURawBufferAtomic<llvm_anyfloat_ty>;
-// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx940, gfx950, gfx12+.
+// Supports float and <2 x half> on gfx908. Supports v2bf16 on gfx90a, gfx942, gfx950, gfx12+.
def int_amdgcn_raw_ptr_buffer_atomic_fadd : AMDGPURawPtrBufferAtomic<llvm_anyfloat_ty>;
class AMDGPUStructBufferAtomic<LLVMType data_ty = llvm_any_ty> : Intrinsic <
@@ -1727,7 +1727,7 @@ def int_amdgcn_raw_tbuffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
[IntrReadMem,
@@ -1743,7 +1743,7 @@ def int_amdgcn_raw_ptr_tbuffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1761,7 +1761,7 @@ def int_amdgcn_raw_tbuffer_store : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1779,7 +1779,7 @@ def int_amdgcn_raw_ptr_tbuffer_store : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1797,7 +1797,7 @@ def int_amdgcn_struct_tbuffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1815,7 +1815,7 @@ def int_amdgcn_struct_ptr_tbuffer_load : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1834,7 +1834,7 @@ def int_amdgcn_struct_ptr_tbuffer_store : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1853,7 +1853,7 @@ def int_amdgcn_struct_tbuffer_store : DefaultAttrsIntrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1872,7 +1872,7 @@ class AMDGPURawBufferLoadLDS : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1891,7 +1891,7 @@ class AMDGPURawPtrBufferLoadLDS : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1914,7 +1914,7 @@ class AMDGPUStructBufferLoadLDS : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -1934,7 +1934,7 @@ class AMDGPUStructPtrBufferLoadLDS : Intrinsic <
llvm_i32_ty], // auxiliary/cachepolicy(imm):
// bit 0 = glc, bit 1 = slc, bit 2 = dlc (gfx10/gfx11),
// bit 3 = swz, bit 4 = scc (gfx90a)
- // gfx940: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
+ // gfx942: bit 0 = sc0, bit 1 = nt, bit 3 = swz, bit 4 = sc1
// gfx12+: bits [0-2] = th, bits [3-4] = scope,
// bit 6 = swz
// all: volatile op (bit 31, stripped at lowering)
@@ -3007,7 +3007,7 @@ def int_amdgcn_fdot2_f32_bf16 :
// f32 %r = llvm.amdgcn.fdot2c.f32.bf16(v2bf16 %a, v2bf16 %b, f32 %c, i1 %clamp)
// %r = %a[0] * %b[0] + %a[1] * %b[1] + c
// TODO: This actually is similar to llvm.amdgcn.fdot2 intrinsics which produces
-// v_dot2c_f32_f16 on gfx940. Maybe we can consolidate these.
+// v_dot2c_f32_f16 on gfx942. Maybe we can consolidate these.
def int_amdgcn_fdot2c_f32_bf16 :
ClangBuiltin<"__builtin_amdgcn_fdot2c_f32_bf16">,
@@ -3250,7 +3250,7 @@ def int_amdgcn_mfma_f32_4x4x4bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, ll
def int_amdgcn_mfma_f32_32x32x8bf16_1k : AMDGPUMfmaIntrinsic<llvm_v16f32_ty, llvm_v4i16_ty>;
def int_amdgcn_mfma_f32_16x16x16bf16_1k : AMDGPUMfmaIntrinsic<llvm_v4f32_ty, llvm_v4i16_ty>;
-// Note: in gfx940 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA.
+// Note: in gfx942 BLGP argument is replaced by NEG bitfield in the DGEMM MFMA.
// Three bits corresponding to the neg modifier applied to the respective
// source operand.
def int_amdgcn_mfma_f64_16x16x4f64 : AMDGPUMfmaIntrinsic<llvm_v4f64_ty, llvm_double_ty>;
@@ -3258,7 +3258,7 @@ def int_amdgcn_mfma_f64_4x4x4f64 : AMDGPUMfmaIntrinsic<llvm_double_ty, ll
}
//===----------------------------------------------------------------------===//
-// gfx940 intrinsics
+// gfx942 intrinsics
// ===----------------------------------------------------------------------===//
class AMDGPUMFp8MfmaIntrinsic<LLVMType DestTy> :
diff --git a/llvm/include/llvm/TargetParser/TargetParser.h b/llvm/include/llvm/TargetParser/TargetParser.h
index 55e7b417428c4..f776b41f3d7ca 100644
--- a/llvm/include/llvm/TargetParser/TargetParser.h
+++ b/llvm/include/llvm/TargetParser/TargetParser.h
@@ -83,8 +83,6 @@ enum GPUKind : uint32_t {
GK_GFX909 = 65,
GK_GFX90A = 66,
GK_GFX90C = 67,
- GK_GFX940 = 68,
- GK_GFX941 = 69,
GK_GFX942 = 70,
GK_GFX950 = 71,
diff --git a/llvm/lib/Object/ELFObjectFile.cpp b/llvm/lib/Object/ELFObjectFile.cpp
index 2d3d70db50c39..ac25d76709726 100644
--- a/llvm/lib/Object/ELFObjectFile.cpp
+++ b/llvm/lib/Object/ELFObjectFile.cpp
@@ -545,10 +545,6 @@ StringRef ELFObjectFileBase::getAMDGPUCPUName() const {
return "gfx90a";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C:
return "gfx90c";
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940:
- return "gfx940";
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941:
- return "gfx941";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942:
return "gfx942";
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950:
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 05e4d85b2ea5d..1f970739c1e7e 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -609,8 +609,6 @@ void ScalarBitSetTraits<ELFYAML::ELF_EF>::bitset(IO &IO,
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX909, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90A, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX90C, EF_AMDGPU_MACH);
- BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX940, EF_AMDGPU_MACH);
- BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX941, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX942, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX950, EF_AMDGPU_MACH);
BCaseMask(EF_AMDGPU_MACH_AMDGCN_GFX1010, EF_AMDGPU_MACH);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 6439149d801f6..e0da312c51a82 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -372,10 +372,10 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts",
// [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO
>;
-def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
- "GFX940Insts",
+def FeatureGFX942Insts : SubtargetFeature<"gfx942-insts",
+ "GFX942Insts",
"true",
- "Additional instructions for GFX940+"
+ "Additional instructions for GFX942+"
>;
def FeaturePermlane16Swap : SubtargetFeature<"permlane16-swap",
@@ -1040,12 +1040,6 @@ def FeatureVALUTransUseHazard : SubtargetFeature<"valu-trans-use-hazard",
"Hazard when TRANS instructions are closely followed by a use of the result"
>;
-def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1",
- "HasForceStoreSC0SC1",
- "true",
- "Has SC0 and SC1 on stores"
->;
-
def FeatureSALUFloatInsts : SubtargetFeature<"salu-float",
"HasSALUFloatInsts",
"true",
@@ -1564,7 +1558,7 @@ def FeatureISAVersion9_0_C : FeatureSet<
def FeatureISAVersion9_4_Common : FeatureSet<
[FeatureGFX9,
FeatureGFX90AInsts,
- FeatureGFX940Insts,
+ FeatureGFX942Insts,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
FeatureDLInsts,
@@ -1619,28 +1613,6 @@ def FeatureISAVersion9_5_Common : FeatureSet<
FeatureAtomicBufferPkAddBF16Inst
])>;
-def FeatureISAVersion9_4_0 : FeatureSet<
- !listconcat(FeatureISAVersion9_4_Common.Features,
- [
- FeatureAddressableLocalMemorySize65536,
- FeatureForceStoreSC0SC1,
- FeatureFP8Insts,
- FeatureFP8ConversionInsts,
- FeatureCvtFP8VOP1Bug,
- FeatureXF32Insts
- ])>;
-
-def FeatureISAVersion9_4_1 : FeatureSet<
- !listconcat(FeatureISAVersion9_4_Common.Features,
- [
- FeatureAddressableLocalMemorySize65536,
- FeatureForceStoreSC0SC1,
- FeatureFP8Insts,
- FeatureFP8ConversionInsts,
- FeatureCvtFP8VOP1Bug,
- FeatureXF32Insts
- ])>;
-
def FeatureISAVersion9_4_2 : FeatureSet<
!listconcat(FeatureISAVersion9_4_Common.Features,
[
@@ -2075,20 +2047,20 @@ def isGFX8GFX9NotGFX90A :
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
def isGFX90AOnly :
- Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
- AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;
+ Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX942Insts()">,
+ AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX942Insts))>;
def isGFX908orGFX90A :
- Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">,
- AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>;
+ Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX942Insts()">,
+ AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX942Insts))>;
-def isGFX940Plus :
- Predicate<"Subtarget->hasGFX940Insts()">,
- AssemblerPredicate<(all_of FeatureGFX940Insts)>;
+def isGFX942Plus :
+ Predicate<"Subtarget->hasGFX942Insts()">,
+ AssemblerPredicate<(all_of FeatureGFX942Insts)>;
-def isNotGFX940Plus :
- Predicate<"!Subtarget->hasGFX940Insts()">,
- AssemblerPredicate<(all_of (not FeatureGFX940Insts))>;
+def isNotGFX942Plus :
+ Predicate<"!Subtarget->hasGFX942Insts()">,
+ AssemblerPredicate<(all_of (not FeatureGFX942Insts))>;
def HasGFX950Insts :
Predicate<"Subtarget->hasGFX950Insts()">,
@@ -2102,11 +2074,11 @@ def HasPermlane32Swap :
Predicate<"Subtarget->hasPermlane32Swap()">,
AssemblerPredicate<(all_of FeaturePermlane32Swap)>;
-def isGFX8GFX9NotGFX940 :
- Predicate<"!Subtarget->hasGFX940Insts() &&"
+def isGFX8GFX9NotGFX942 :
+ Predicate<"!Subtarget->hasGFX942Insts() &&"
"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
- AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>;
+ AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX942Insts))>;
def isGFX8GFX9 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
@@ -2213,9 +2185,9 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>;
def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">,
- AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX942Insts)>;
def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">,
- AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX942Insts, FeatureGFX11Insts)>;
def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">,
AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>;
@@ -2323,7 +2295,7 @@ def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">,
def HasFmaakFmamkF32Insts :
Predicate<"Subtarget->hasFmaakFmamkF32Insts()">,
- AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX942Insts)>;
def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">,
AssemblerPredicate<(all_of FeatureImageInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 3bbbbcf71d8ae..cf3843869808b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -4295,7 +4295,7 @@ AMDGPUInstructionSelector::selectVOP3PModsImpl(
// TODO: Handle G_FSUB 0 as fneg
// TODO: Match op_sel through g_build_vector_trunc and g_shuffle_vector.
- (void)IsDOT; // DOTs do not use OPSEL on gfx940+, check ST.hasDOTOpSelHazard()
+ (void)IsDOT; // DOTs do not use OPSEL on gfx942+, check ST.hasDOTOpSelHazard()
// Packed instructions do not have abs modifiers.
Mods |= SISrcMods::OP_SEL_1;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 54ed3789326cb..13ace855caee4 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1492,14 +1492,12 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return AMDGPU::isGFX9(getSTI());
}
- // TODO: isGFX90A is also true for GFX940. We need to clean it.
+ // TODO: isGFX90A is also true for GFX942. We need to clean it.
bool isGFX90A() const {
return AMDGPU::isGFX90A(getSTI());
}
- bool isGFX940() const {
- return AMDGPU::isGFX940(getSTI());
- }
+ bool isGFX942() const { return AMDGPU::isGFX942(getSTI()); }
bool isGFX9Plus() const {
return AMDGPU::isGFX9Plus(getSTI());
@@ -4633,7 +4631,7 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
uint64_t TSFlags = MII.get(Opc).TSFlags;
- if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
+ if (isGFX942() && (TSFlags & SIInstrFlags::IsDOT)) {
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
if (OpSelIdx != -1) {
if (Inst.getOperand(OpSelIdx).getImm() != 0)
@@ -4942,12 +4940,12 @@ bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
auto FB = getFeatureBits();
bool UsesNeg = false;
- if (FB[AMDGPU::FeatureGFX940Insts]) {
+ if (FB[AMDGPU::FeatureGFX942Insts]) {
switch (Opc) {
- case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
- case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
- case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
- case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_acd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_vcd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_acd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_vcd:
UsesNeg = true;
}
}
@@ -5062,7 +5060,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
}
}
- if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
+ if (isGFX90A() && !isGFX942() && (CPol & CPol::SCC)) {
const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
SIInstrFlags::FLAT;
@@ -5081,7 +5079,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
if (TSFlags & SIInstrFlags::IsAtomicRet) {
if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
- Error(IDLoc, isGFX940() ? "instruction must use sc0"
+ Error(IDLoc, isGFX942() ? "instruction must use sc0"
: "instruction must use glc");
return false;
}
@@ -5090,8 +5088,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(
- &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
- Error(S, isGFX940() ? "instruction must not use sc0"
+ &CStr.data()[CStr.find(isGFX942() ? "sc0" : "glc")]);
+ Error(S, isGFX942() ? "instruction must not use sc0"
: "instruction must not use glc");
return false;
}
@@ -6657,7 +6655,7 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling) const {
Disabling = Id.consume_front("no");
- if (isGFX940() && !Mnemo.starts_with("s_")) {
+ if (isGFX942() && !Mnemo.starts_with("s_")) {
return StringSwitch<unsigned>(Id)
.Case("nt", AMDGPU::CPol::NT)
.Case("sc0", AMDGPU::CPol::SC0)
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index f2686bdf56b41..a48115fbfb272 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1146,7 +1146,7 @@ let OtherPredicates = [HasGFX10_BEncoding] in {
>;
}
-let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
+let SubtargetPredicate = isGFX8GFX9NotGFX942 in {
def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
}
@@ -1228,7 +1228,7 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores <
} // End HasD16LoadStore
-let SubtargetPredicate = isNotGFX940Plus in
+let SubtargetPredicate = isNotGFX942Plus in
def BUFFER_WBINVL1 : MUBUF_Invalidate <
"buffer_wbinvl1", int_amdgcn_buffer_wbinvl1
>;
@@ -1311,7 +1311,7 @@ let SubtargetPredicate = isGFX7Plus in {
// Instruction definitions for CI and newer.
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isNotGFX940Plus in
+let SubtargetPredicate = isNotGFX942Plus in
def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
int_amdgcn_buffer_wbinvl1_vol>;
@@ -1341,7 +1341,7 @@ let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
}
def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
- let SubtargetPredicate = isGFX940Plus;
+ let SubtargetPredicate = isGFX942Plus;
let has_glc = 1;
let has_sccb = 1;
let InOperandList = (ins CPol_0:$cpol);
@@ -3095,9 +3095,9 @@ multiclass MUBUF_Real_gfx90a <bits<7> op,
}
}
-class MUBUF_Real_gfx940 <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX940> {
- let AssemblerPredicate = isGFX940Plus;
+class MUBUF_Real_gfx942 <bits<7> op, MUBUF_Pseudo ps> :
+ MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX942> {
+ let AssemblerPredicate = isGFX942Plus;
let DecoderNamespace = "GFX9";
let AsmString = ps.Mnemonic # ps.AsmOperands;
@@ -3116,7 +3116,7 @@ multiclass MUBUF_Real_vi_gfx90a<bits<7> op, bit isTFE = 0> : MUBUF_Real_vi<op> {
let AssemblerPredicate = isGFX90AOnly in
defm NAME : MUBUF_Real_gfx90a<op, 0>;
- def _gfx940 : MUBUF_Real_gfx940<op, ps>;
+ def _gfx942 : MUBUF_Real_gfx942<op, ps>;
}
}
@@ -3314,9 +3314,9 @@ let AsmString = BUFFER_WBL2.Mnemonic, // drop flags
defm BUFFER_WBL2 : MUBUF_Real_gfx90a<0x28>;
defm BUFFER_INVL2 : MUBUF_Real_gfx90a<0x29>;
-let SubtargetPredicate = isGFX940Plus in {
-def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>;
-def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>;
+let SubtargetPredicate = isGFX942Plus in {
+def BUFFER_WBL2_gfx942 : MUBUF_Real_gfx942<0x28, BUFFER_WBL2>;
+def BUFFER_INV_gfx942 : MUBUF_Real_gfx942<0x29, BUFFER_INV>;
}
class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td
index 9ca853befba73..d3487daee364f 100644
--- a/llvm/lib/Target/AMDGPU/DSInstructions.td
+++ b/llvm/lib/Target/AMDGPU/DSInstructions.td
@@ -1773,7 +1773,7 @@ def DS_READ_B128_vi : DS_Real_vi<0xff, DS_READ_B128>;
def DS_ADD_F64_vi : DS_Real_vi<0x5c, DS_ADD_F64>;
def DS_ADD_RTN_F64_vi : DS_Real_vi<0x7c, DS_ADD_RTN_F64>;
-// GFX940+.
+// GFX942+.
def DS_PK_ADD_F16_vi : DS_Real_vi<0x17, DS_PK_ADD_F16>;
def DS_PK_ADD_RTN_F16_vi : DS_Real_vi<0xb7, DS_PK_ADD_RTN_F16>;
def DS_PK_ADD_BF16_vi : DS_Real_vi<0x18, DS_PK_ADD_BF16>;
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 308ab8e3b82c4..6413dd0d6288a 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -551,7 +551,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
} else if (Bytes.size() >= 16 &&
STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
DecoderUInt128 DecW = eat16Bytes(Bytes);
- if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
+ if (tryDecodeInst(DecoderTableGFX942128, MI, DecW, Address, CS))
break;
// Reinitialize Bytes
@@ -580,8 +580,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
break;
- if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
- tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
+ if (STI.hasFeature(AMDGPU::FeatureGFX942Insts) &&
+ tryDecodeInst(DecoderTableGFX94264, MI, QW, Address, CS))
break;
if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index ea6e703eba5d9..7bc668fce8fb7 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -814,7 +814,7 @@ defm FLAT_ATOMIC_FMAX : FLAT_Atomic_Pseudo <"flat_atomic_fmax",
} // End SubtargetPredicate = isGFX7GFX10GFX11
-// GFX940-, GFX11-only flat instructions.
+// GFX942-, GFX11-only flat instructions.
let SubtargetPredicate = HasFlatAtomicFaddF32Inst in {
defm FLAT_ATOMIC_ADD_F32 : FLAT_Atomic_Pseudo<"flat_atomic_add_f32", VGPR_32, f32>;
} // End SubtargetPredicate = HasFlatAtomicFaddF32Inst
@@ -1866,10 +1866,10 @@ multiclass FLAT_Real_AllAddr_vi<bits<7> op,
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
}
-class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
+class FLAT_Real_gfx942 <bits<7> op, FLAT_Pseudo ps> :
FLAT_Real <op, ps>,
- SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
- let AssemblerPredicate = isGFX940Plus;
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX942> {
+ let AssemblerPredicate = isGFX942Plus;
let DecoderNamespace = "GFX9";
let Inst{13} = ps.sve;
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
@@ -1877,43 +1877,43 @@ class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
- let AssemblerPredicate = isGFX8GFX9NotGFX940;
- let OtherPredicates = [isGFX8GFX9NotGFX940];
+ let AssemblerPredicate = isGFX8GFX9NotGFX942;
+ let OtherPredicates = [isGFX8GFX9NotGFX942];
}
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
let DecoderNamespace = "GFX9";
}
- let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in {
- def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
- def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
- def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
+ let AssemblerPredicate = isGFX942Plus, SubtargetPredicate = isGFX942Plus in {
+ def _VE_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SVS_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
+ def _ST_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
}
}
-multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
- string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic),
+multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx942_op,
+ string pre_gfx942_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic),
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
- let OtherPredicates = [isGFX8GFX9NotGFX940] in {
- def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
+ let OtherPredicates = [isGFX8GFX9NotGFX942] in {
+ def _vi : FLAT_Real_vi<pre_gfx942_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
+ let AsmString = pre_gfx942_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
}
- def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
- let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
+ def _SADDR_vi : FLAT_Real_vi<pre_gfx942_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
+ let AsmString = pre_gfx942_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
}
}
- let SubtargetPredicate = isGFX940Plus in {
- def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
- def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+ let SubtargetPredicate = isGFX942Plus in {
+ def _gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SADDR_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
}
}
-multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
- defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>;
- let SubtargetPredicate = isGFX940Plus in {
- def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
- def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
+multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx942_op> {
+ defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx942_op>;
+ let SubtargetPredicate = isGFX942Plus in {
+ def _SVS_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
+ def _ST_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
}
}
@@ -2075,8 +2075,8 @@ defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>;
defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>;
defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>;
-let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
- // These instructions are encoded differently on gfx90* and gfx940.
+let SubtargetPredicate = isGFX8GFX9NotGFX942 in {
+ // These instructions are encoded differently on gfx90* and gfx942.
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
}
@@ -2090,39 +2090,39 @@ let SubtargetPredicate = isGFX90AOnly in {
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
} // End SubtargetPredicate = isGFX90AOnly
-multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
- def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
- def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+multiclass FLAT_Real_AllAddr_gfx942<bits<7> op> {
+ def _gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SADDR_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
}
-multiclass FLAT_Real_Atomics_gfx940 <bits<7> op> {
+multiclass FLAT_Real_Atomics_gfx942 <bits<7> op> {
defvar ps = !cast<FLAT_Pseudo>(NAME);
- def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
- def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
+ def _gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
+ def _RTN_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
}
-multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> :
- FLAT_Real_AllAddr_gfx940<op> {
- def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
- def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
+multiclass FLAT_Global_Real_Atomics_gfx942<bits<7> op> :
+ FLAT_Real_AllAddr_gfx942<op> {
+ def _RTN_gfx942 : FLAT_Real_gfx942 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
+ def _SADDR_RTN_gfx942 : FLAT_Real_gfx942 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
}
-let SubtargetPredicate = isGFX940Plus in {
- // These instructions are encoded differently on gfx90* and gfx940.
- defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
- defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
+let SubtargetPredicate = isGFX942Plus in {
+ // These instructions are encoded differently on gfx90* and gfx942.
+ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx942 <0x04d>;
+ defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx942 <0x04e>;
- defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f>;
- defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50>;
- defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51>;
- defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
- defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
- defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
+ defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx942<0x4f>;
+ defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx942<0x50>;
+ defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx942<0x51>;
+ defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx942<0x4f>;
+ defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx942<0x50>;
+ defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx942<0x51>;
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
-} // End SubtargetPredicate = isGFX940Plus
+} // End SubtargetPredicate = isGFX942Plus
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 827598078af53..0f76b0ac5331d 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -124,10 +124,10 @@ static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
return false;
- if (!ST.hasGFX940Insts())
+ if (!ST.hasGFX942Insts())
return true;
- return AMDGPU::getMAIIsGFX940XDL(Opcode);
+ return AMDGPU::getMAIIsGFX942XDL(Opcode);
}
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
@@ -870,7 +870,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
// 8 bytes can have there store data over written by the next instruction.
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1;
+ const int VALUWaitStates = ST.hasGFX942Insts() ? 2 : 1;
int WaitStatesNeeded = 0;
if (!TRI->isVectorRegister(MRI, Def.getReg()))
@@ -2251,9 +2251,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
}
static int
-GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses,
+GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx940 | gfx950
+ // xdl def cycles | gfx942 | gfx950
// 2 pass | 3 4
// 4 pass | 5 6
// 8 pass | 9 10
@@ -2262,9 +2262,9 @@ GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses,
}
static int
-GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses,
+GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx940 | gfx950
+ // xdl def cycles | gfx942 | gfx950
// 2 pass | 3 3
// 4 pass | 5 6
// 8 pass | 9 10
@@ -2273,7 +2273,7 @@ GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses,
}
static int
-GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
+GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
// 2 pass -> 2
// 4 pass -> 4
// 8 pass -> 8
@@ -2282,7 +2282,7 @@ GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
}
static int
-GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
+GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
// 2 pass -> 4
// 4 pass -> 6
// 8 pass -> 10
@@ -2290,9 +2290,9 @@ GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
return NumPasses + 2;
}
-static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses,
+static int GFX942_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx940 | gfx950
+ // xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
// 4 pass | 7 8
// 8 pass | 11 12
@@ -2343,7 +2343,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19;
const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
- const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
+ const int GFX942_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
const int MaxWaitStates = 19;
if (!Use.isReg())
@@ -2375,7 +2375,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
unsigned Opc1 = MI1->getOpcode();
int NeedWaitStates = 0;
if (OpNo == SrcCIdx) {
- if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) {
+ if (!isDGEMM(Opc) && (!ST.hasGFX942Insts() && isDGEMM(Opc1))) {
NeedWaitStates = 0;
} else if (FullReg) {
if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
@@ -2383,9 +2383,9 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
(Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
- else if (ST.hasGFX940Insts() &&
+ else if (ST.hasGFX942Insts() &&
TSchedModel.computeInstrLatency(MI1) == 2)
- NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates;
+ NeedWaitStates = GFX942_SMFMA4x4WritesVGPRFullSrcCWaitStates;
} else {
switch (Opc1) {
case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
@@ -2405,18 +2405,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
break;
default:
int NumPasses = TSchedModel.computeInstrLatency(MI1);
- if (ST.hasGFX940Insts()) {
+ if (ST.hasGFX942Insts()) {
if (isXDL(ST, *MI) && !isXDL(ST, *MI1))
break;
NeedWaitStates =
isXDL(ST, *MI1)
? (isXDL(ST, *MI)
- ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(
+ ? GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(
NumPasses, ST.hasGFX950Insts())
- : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(
+ : GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(
NumPasses, ST.hasGFX950Insts()))
- : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
+ : GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
NumPasses);
break;
}
@@ -2462,12 +2462,12 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
default:
int NumPasses = TSchedModel.computeInstrLatency(MI1);
- if (ST.hasGFX940Insts()) {
+ if (ST.hasGFX942Insts()) {
NeedWaitStates =
isXDL(ST, *MI1)
- ? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(
+ ? GFX942_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(
NumPasses, ST.hasGFX950Insts())
- : GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(
+ : GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(
NumPasses);
break;
}
@@ -2590,7 +2590,7 @@ int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) {
return WaitStatesNeeded;
}
-static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
+static int GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
// 2 pass -> 4
// 4 pass -> 6
// 8 pass -> 10
@@ -2598,9 +2598,9 @@ static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
return NumPasses + 2;
}
-static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
+static int GFX942_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx940 | gfx950
+ // xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
// 4 pass | 7 8
// 8 pass | 11 12
@@ -2608,9 +2608,9 @@ static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
}
-static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
+static int GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx940 | gfx950
+ // xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
// 4 pass | 7 8
// 8 pass | 11 12
@@ -2618,7 +2618,7 @@ static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
}
-static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
+static int GFX942_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
// 2 pass -> 4
// 4 pass -> 6
// 8 pass -> 10
@@ -2723,7 +2723,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
// is a DGEMM instruction in-between a VALU and a VMEM instruction it
// causes the SQ to incorrectly not insert two wait states between the two
// instructions needed to avoid data hazard.
- if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX940Insts()) {
+ if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX942Insts()) {
DGEMMAfterVALUWrite = false;
if (TRI.isVectorRegister(MRI, Reg)) {
int WaitStatesNeededForUse =
@@ -2763,12 +2763,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
default:
llvm_unreachable("unexpected dgemm");
}
- } else if (ST.hasGFX940Insts()) {
+ } else if (ST.hasGFX942Insts()) {
NeedWaitStates =
isXDL(ST, *MFMA)
- ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(
+ ? GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(
NumPasses, ST.hasGFX950Insts())
- : GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(
+ : GFX942_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(
NumPasses);
} else {
switch (HazardDefLatency) {
@@ -2813,7 +2813,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
const int SMFMA16x16WriteVgprVALUWawWaitStates = 11;
const int SMFMA32x32WriteVgprVALUWawWaitStates = 19;
const int SMFMA4x4ReadVgprVALUWarWaitStates = 1;
- const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3;
+ const int GFX942_XDL4PassReadVgprVALUWarWaitStates = 3;
const int SMFMA16x16ReadVgprVALUWarWaitStates = 7;
const int SMFMA32x32ReadVgprVALUWarWaitStates = 15;
const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6;
@@ -2850,12 +2850,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
default:
llvm_unreachable("unexpected number of cycles for dgemm");
}
- } else if (ST.hasGFX940Insts()) {
+ } else if (ST.hasGFX942Insts()) {
NeedWaitStates =
isXDL(ST, *MFMA)
- ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(
+ ? GFX942_XDL_N_PassWriteVgprVALUWawWaitStates(
NumPasses, ST.hasGFX950Insts())
- : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses);
+ : GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses);
} else {
switch (NumPasses) {
case 2:
@@ -2884,7 +2884,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
!MI.readsRegister(Reg, &TRI))
return false;
- if (ST.hasGFX940Insts() && !isXDL(ST, MI))
+ if (ST.hasGFX942Insts() && !isXDL(ST, MI))
return false;
const MachineOperand *SrcC =
@@ -2906,16 +2906,21 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
int NeedWaitStates = MaxWaitStates;
switch (HazardDefLatency) {
- case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
- break;
- case 4: assert(ST.hasGFX940Insts());
- NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates;
- break;
- case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
- break;
- case 16: [[fallthrough]];
- default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates;
- break;
+ case 2:
+ NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
+ break;
+ case 4:
+ assert(ST.hasGFX942Insts());
+ NeedWaitStates = GFX942_XDL4PassReadVgprVALUWarWaitStates;
+ break;
+ case 8:
+ NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
+ break;
+ case 16:
+ [[fallthrough]];
+ default:
+ NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates;
+ break;
}
int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse;
diff --git a/llvm/lib/Target/AMDGPU/GCNProcessors.td b/llvm/lib/Target/AMDGPU/GCNProcessors.td
index a86c76bb6075e..0b372e29efe67 100644
--- a/llvm/lib/Target/AMDGPU/GCNProcessors.td
+++ b/llvm/lib/Target/AMDGPU/GCNProcessors.td
@@ -192,15 +192,7 @@ def : ProcessorModel<"gfx90c", SIQuarterSpeedModel,
FeatureISAVersion9_0_C.Features
>;
-def : ProcessorModel<"gfx940", SIDPGFX940FullSpeedModel,
- FeatureISAVersion9_4_0.Features
->;
-
-def : ProcessorModel<"gfx941", SIDPGFX940FullSpeedModel,
- FeatureISAVersion9_4_1.Features
->;
-
-def : ProcessorModel<"gfx942", SIDPGFX940FullSpeedModel,
+def : ProcessorModel<"gfx942", SIDPGFX942FullSpeedModel,
FeatureISAVersion9_4_2.Features
>;
@@ -213,8 +205,8 @@ def : ProcessorModel<"gfx9-generic", SIQuarterSpeedModel,
FeatureISAVersion9_Generic.Features
>;
-// [gfx940, gfx941, gfx942]
-def : ProcessorModel<"gfx9-4-generic", SIDPGFX940FullSpeedModel,
+// [gfx942]
+def : ProcessorModel<"gfx9-4-generic", SIDPGFX942FullSpeedModel,
FeatureISAVersion9_4_Generic.Features
>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 342b211199dca..72f3d1abb82fe 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -107,7 +107,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool GFX8Insts = false;
bool GFX9Insts = false;
bool GFX90AInsts = false;
- bool GFX940Insts = false;
+ bool GFX942Insts = false;
bool GFX950Insts = false;
bool GFX10Insts = false;
bool GFX11Insts = false;
@@ -246,7 +246,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMADIntraFwdBug = false;
bool HasVOPDInsts = false;
bool HasVALUTransUseHazard = false;
- bool HasForceStoreSC0SC1 = false;
bool HasRequiredExportPriority = false;
bool HasVmemWriteVgprInOrder = false;
bool HasAshrPkInsts = false;
@@ -654,10 +653,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// The ST addressing mode means no registers are used, either VGPR or SGPR,
// but only immediate offset is swizzled and added to the FLAT scratch base.
bool hasFlatScratchSTMode() const {
- return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
+ return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX942Insts());
}
- bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
+ bool hasFlatScratchSVSMode() const { return GFX942Insts || GFX11Insts; }
bool hasScalarFlatScratchInsts() const {
return ScalarFlatScratchInsts;
@@ -676,9 +675,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return GFX10_BEncoding;
}
- bool hasExportInsts() const {
- return !hasGFX940Insts();
- }
+ bool hasExportInsts() const { return !hasGFX942Insts(); }
bool hasVINTERPEncoding() const {
return GFX11Insts;
@@ -1073,7 +1070,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
}
bool hasFmaakFmamkF32Insts() const {
- return getGeneration() >= GFX10 || hasGFX940Insts();
+ return getGeneration() >= GFX10 || hasGFX942Insts();
}
bool hasImageInsts() const {
@@ -1130,9 +1127,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasMadF16() const;
- bool hasMovB64() const { return GFX940Insts; }
+ bool hasMovB64() const { return GFX942Insts; }
- bool hasLshlAddB64() const { return GFX940Insts; }
+ bool hasLshlAddB64() const { return GFX942Insts; }
bool enableSIScheduler() const {
return EnableSIScheduler;
@@ -1216,25 +1213,21 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// Shift amount of a 64 bit shift cannot be a highest allocated register
// if also at the end of the allocation block.
- bool hasShift64HighRegBug() const {
- return GFX90AInsts && !GFX940Insts;
- }
+ bool hasShift64HighRegBug() const { return GFX90AInsts && !GFX942Insts; }
// Has one cycle hazard on transcendental instruction feeding a
// non transcendental VALU.
- bool hasTransForwardingHazard() const { return GFX940Insts; }
+ bool hasTransForwardingHazard() const { return GFX942Insts; }
// Has one cycle hazard on a VALU instruction partially writing dst with
// a shift of result bits feeding another VALU instruction.
- bool hasDstSelForwardingHazard() const { return GFX940Insts; }
+ bool hasDstSelForwardingHazard() const { return GFX942Insts; }
// Cannot use op_sel with v_dot instructions.
- bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
+ bool hasDOTOpSelHazard() const { return GFX942Insts || GFX11Insts; }
// Does not have HW interlocs for VALU writing and then reading SGPRs.
- bool hasVDecCoExecHazard() const {
- return GFX940Insts;
- }
+ bool hasVDecCoExecHazard() const { return GFX942Insts; }
bool hasNSAtoVMEMBug() const {
return HasNSAtoVMEMBug;
@@ -1264,8 +1257,6 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasCvtScaleForwardingHazard() const { return GFX950Insts; }
- bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }
-
bool requiresCodeObjectV6() const { return RequiresCOV6; }
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
@@ -1297,12 +1288,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasPackedTID() const { return HasPackedTID; }
- // GFX940 is a derivation to GFX90A. hasGFX940Insts() being true implies that
+ // GFX942 is a derivation to GFX90A. hasGFX942Insts() being true implies that
// hasGFX90AInsts is also true.
- bool hasGFX940Insts() const { return GFX940Insts; }
+ bool hasGFX942Insts() const { return GFX942Insts; }
- // GFX950 is a derivation to GFX940. hasGFX950Insts() implies that
- // hasGFX940Insts and hasGFX90AInsts are also true.
+ // GFX950 is a derivation to GFX942. hasGFX950Insts() implies that
+ // hasGFX942Insts and hasGFX90AInsts are also true.
bool hasGFX950Insts() const { return GFX950Insts; }
/// Returns true if the target supports
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 381841f142855..5a72543c8eef1 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -151,15 +151,16 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
}
if (Imm & CPol::GLC)
- O << ((AMDGPU::isGFX940(STI) &&
- !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
- : " glc");
+ O << ((AMDGPU::isGFX942(STI) &&
+ !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD))
+ ? " sc0"
+ : " glc");
if (Imm & CPol::SLC)
- O << (AMDGPU::isGFX940(STI) ? " nt" : " slc");
+ O << (AMDGPU::isGFX942(STI) ? " nt" : " slc");
if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
O << " dlc";
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
- O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
+ O << (AMDGPU::isGFX942(STI) ? " sc1" : " scc");
if (Imm & ~CPol::ALL_pregfx12)
O << " /* unexpected cache policy bit */";
}
@@ -629,12 +630,12 @@ void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
if (!Imm)
return;
- if (AMDGPU::isGFX940(STI)) {
+ if (AMDGPU::isGFX942(STI)) {
switch (MI->getOpcode()) {
- case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
- case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
- case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
- case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_acd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_vcd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_acd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_vcd:
O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
<< ((Imm >> 2) & 1) << ']';
return;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
index 059bab5838526..4a4ad712e304d 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUTargetStreamer.cpp
@@ -93,8 +93,6 @@ StringRef AMDGPUTargetStreamer::getArchNameFromElfMach(unsigned ElfMach) {
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX909: AK = GK_GFX909; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A: AK = GK_GFX90A; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C: AK = GK_GFX90C; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX940: AK = GK_GFX940; break;
- case ELF::EF_AMDGPU_MACH_AMDGCN_GFX941: AK = GK_GFX941; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX942: AK = GK_GFX942; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX950: AK = GK_GFX950; break;
case ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010: AK = GK_GFX1010; break;
@@ -180,8 +178,6 @@ unsigned AMDGPUTargetStreamer::getElfMach(StringRef GPU) {
case GK_GFX909: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX909;
case GK_GFX90A: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90A;
case GK_GFX90C: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX90C;
- case GK_GFX940: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX940;
- case GK_GFX941: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX941;
case GK_GFX942: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX942;
case GK_GFX950: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX950;
case GK_GFX1010: return ELF::EF_AMDGPU_MACH_AMDGCN_GFX1010;
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index f812ae652b63d..3d5976183c10f 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -42,7 +42,7 @@ enum {
GFX10 = 6,
SDWA10 = 7,
GFX90A = 8,
- GFX940 = 9,
+ GFX942 = 9,
GFX11 = 10,
GFX12 = 11,
};
@@ -542,7 +542,7 @@ enum Id { // HwRegCode, (6) [5:0]
ID_EXCP_FLAG_USER = 18,
ID_TRAP_CTRL = 19,
- // GFX940 specific registers
+ // GFX942 specific registers
ID_XCC_ID = 20,
ID_SQ_PERF_SNAPSHOT_DATA = 21,
ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e09b310d107ac..909ad07782fc6 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16823,39 +16823,39 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// safe. The message phrasing also should be better.
if (globalMemoryFPAtomicIsLegal(*Subtarget, RMW, HasSystemScope)) {
if (AS == AMDGPUAS::FLAT_ADDRESS) {
- // gfx940, gfx12
+ // gfx942, gfx12
if (Subtarget->hasAtomicFlatPkAdd16Insts() && isV2F16OrV2BF16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
} else if (AMDGPU::isExtendedGlobalAddrSpace(AS)) {
- // gfx90a, gfx940, gfx12
+ // gfx90a, gfx942, gfx12
if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
- // gfx940, gfx12
+ // gfx942, gfx12
if (Subtarget->hasAtomicGlobalPkAddBF16Inst() && isV2BF16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
} else if (AS == AMDGPUAS::BUFFER_FAT_POINTER) {
- // gfx90a, gfx940, gfx12
+ // gfx90a, gfx942, gfx12
if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isV2F16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
- // While gfx90a/gfx940 supports v2bf16 for global/flat, it does not for
+ // While gfx90a/gfx942 supports v2bf16 for global/flat, it does not for
// buffer. gfx12 does have the buffer version.
if (Subtarget->hasAtomicBufferPkAddBF16Inst() && isV2BF16(Ty))
return ReportUnsafeHWInst(AtomicExpansionKind::None);
}
- // global and flat atomic fadd f64: gfx90a, gfx940.
+ // global and flat atomic fadd f64: gfx90a, gfx942.
if (Subtarget->hasFlatBufferGlobalAtomicFaddF64Inst() && Ty->isDoubleTy())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
if (AS != AMDGPUAS::FLAT_ADDRESS) {
if (Ty->isFloatTy()) {
- // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx940,
+ // global/buffer atomic fadd f32 no-rtn: gfx908, gfx90a, gfx942,
// gfx11+.
if (RMW->use_empty() && Subtarget->hasAtomicFaddNoRtnInsts())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
- // global/buffer atomic fadd f32 rtn: gfx90a, gfx940, gfx11+.
+ // global/buffer atomic fadd f32 rtn: gfx90a, gfx942, gfx11+.
if (!RMW->use_empty() && Subtarget->hasAtomicFaddRtnInsts())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
} else {
@@ -16867,7 +16867,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
}
}
- // flat atomic fadd f32: gfx940, gfx11+.
+ // flat atomic fadd f32: gfx942, gfx11+.
if (AS == AMDGPUAS::FLAT_ADDRESS && Ty->isFloatTy()) {
if (Subtarget->hasFlatAtomicFaddF32Inst())
return ReportUnsafeHWInst(AtomicExpansionKind::None);
@@ -16906,7 +16906,7 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
// float, double restored in gfx10.
// double removed again in gfx11, so only f32 for gfx11/gfx12.
//
- // For gfx9, gfx90a and gfx940 support f64 for global (same as fadd), but
+ // For gfx9, gfx90a and gfx942 support f64 for global (same as fadd), but
// no f32.
if (AS == AMDGPUAS::FLAT_ADDRESS) {
if (Subtarget->hasAtomicFMinFMaxF32FlatInsts() && Ty->isFloatTy())
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 7dace11d208a0..98f22b70c444e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -9475,8 +9475,8 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
if (ST.hasGFX90AInsts()) {
uint16_t NMCOp = (uint16_t)-1;
- if (ST.hasGFX940Insts())
- NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX940);
+ if (ST.hasGFX942Insts())
+ NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX942);
if (NMCOp == (uint16_t)-1)
NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A);
if (NMCOp == (uint16_t)-1)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index e30e257da6873..ee633f1a26035 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -28,7 +28,7 @@ def SIEncodingFamily {
int GFX10 = 6;
int SDWA10 = 7;
int GFX90A = 8;
- int GFX940 = 9;
+ int GFX942 = 9;
int GFX11 = 10;
int GFX12 = 11;
}
@@ -3113,7 +3113,7 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.GFX10)],
[!cast<string>(SIEncodingFamily.SDWA10)],
[!cast<string>(SIEncodingFamily.GFX90A)],
- [!cast<string>(SIEncodingFamily.GFX940)],
+ [!cast<string>(SIEncodingFamily.GFX942)],
[!cast<string>(SIEncodingFamily.GFX11)],
[!cast<string>(SIEncodingFamily.GFX12)]];
}
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index be6cff873532b..9a41afcd56f44 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -359,11 +359,6 @@ class SICacheControl {
/// Virtual destructor to allow derivations to be deleted.
virtual ~SICacheControl() = default;
-
- virtual bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
- MachineBasicBlock::iterator &MI) const {
- return false;
- }
};
class SIGfx6CacheControl : public SICacheControl {
@@ -470,7 +465,7 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl {
Position Pos) const override;
};
-class SIGfx940CacheControl : public SIGfx90ACacheControl {
+class SIGfx942CacheControl : public SIGfx90ACacheControl {
protected:
/// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI
@@ -492,8 +487,7 @@ class SIGfx940CacheControl : public SIGfx90ACacheControl {
}
public:
-
- SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
+ SIGfx942CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -518,20 +512,6 @@ class SIGfx940CacheControl : public SIGfx90ACacheControl {
bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
-
- bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
- MachineBasicBlock::iterator &MI) const override {
- bool Changed = false;
- if (ST.hasForceStoreSC0SC1() &&
- (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
- SIAtomicAddrSpace::GLOBAL |
- SIAtomicAddrSpace::OTHER)) !=
- SIAtomicAddrSpace::NONE) {
- Changed |= enableSC0Bit(MI);
- Changed |= enableSC1Bit(MI);
- }
- return Changed;
- }
};
class SIGfx10CacheControl : public SIGfx7CacheControl {
@@ -958,8 +938,8 @@ bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
/* static */
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();
- if (ST.hasGFX940Insts())
- return std::make_unique<SIGfx940CacheControl>(ST);
+ if (ST.hasGFX942Insts())
+ return std::make_unique<SIGfx942CacheControl>(ST);
if (ST.hasGFX90AInsts())
return std::make_unique<SIGfx90ACacheControl>(ST);
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
@@ -1577,7 +1557,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
return Changed;
}
-bool SIGfx940CacheControl::enableLoadCacheBypass(
+bool SIGfx942CacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && !MI->mayStore());
@@ -1621,9 +1601,9 @@ bool SIGfx940CacheControl::enableLoadCacheBypass(
return Changed;
}
-bool SIGfx940CacheControl::enableStoreCacheBypass(
- const MachineBasicBlock::iterator &MI,
- SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const {
+bool SIGfx942CacheControl::enableStoreCacheBypass(
+ const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
+ SIAtomicAddrSpace AddrSpace) const {
assert(!MI->mayLoad() && MI->mayStore());
bool Changed = false;
@@ -1661,7 +1641,7 @@ bool SIGfx940CacheControl::enableStoreCacheBypass(
return Changed;
}
-bool SIGfx940CacheControl::enableRMWCacheBypass(
+bool SIGfx942CacheControl::enableRMWCacheBypass(
const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && MI->mayStore());
@@ -1690,7 +1670,7 @@ bool SIGfx940CacheControl::enableRMWCacheBypass(
return Changed;
}
-bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
+bool SIGfx942CacheControl::enableVolatileAndOrNonTemporal(
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
// Only handle load and store, not atomic read-modify-write insructions. The
@@ -1730,7 +1710,7 @@ bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
return Changed;
}
-bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
+bool SIGfx942CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
@@ -1816,7 +1796,7 @@ bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
return Changed;
}
-bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
+bool SIGfx942CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
@@ -2821,7 +2801,6 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
Changed |= expandLoad(*MOI, MI);
else if (const auto &MOI = MOA.getStoreInfo(MI)) {
Changed |= expandStore(*MOI, MI);
- Changed |= CC->tryForceStoreSC0SC1(*MOI, MI);
} else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
Changed |= expandAtomicFence(*MOI, MI);
else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index 117add324db56..2a374b360b04a 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -94,7 +94,7 @@ class SISchedMachineModel : SchedMachineModel {
def SIFullSpeedModel : SISchedMachineModel;
def SIQuarterSpeedModel : SISchedMachineModel;
def SIDPFullSpeedModel : SISchedMachineModel;
-def SIDPGFX940FullSpeedModel : SISchedMachineModel;
+def SIDPGFX942FullSpeedModel : SISchedMachineModel;
def SIDPGFX950FullSpeedModel : SISchedMachineModel;
def GFX10SpeedModel : SISchedMachineModel;
def GFX11SpeedModel : SISchedMachineModel;
@@ -276,7 +276,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>;
} // End SchedModel = SIDPFullSpeedModel
-let SchedModel = SIDPGFX940FullSpeedModel in {
+let SchedModel = SIDPGFX942FullSpeedModel in {
defm : SICommonWriteRes;
@@ -308,7 +308,7 @@ def : InstRW<[Write8PassDGEMM, MIMFMARead], (instregex "^V_MFMA_.64_16X16X")>;
def : InstRW<[Write4PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_16X16X")>;
def : InstRW<[Write8PassMAI, MIMFMARead], (instregex "^V_SMFMAC_.32_32X32X")>;
-} // End SchedModel = SIDPGFX940FullSpeedModel
+} // End SchedModel = SIDPGFX942FullSpeedModel
let SchedModel = SIDPGFX950FullSpeedModel in {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index a8e4ce133ffbc..373c6be33e8a8 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -216,12 +216,12 @@ static constexpr CustomOperand Operands[] = {
{{"HW_REG_SCRATCH_BASE_HI"}, ID_FLAT_SCR_HI, isGFX12Plus},
{{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},
- // GFX940 specific registers
- {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
- {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
- {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
- {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
- {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
+ // GFX942 specific registers
+ {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX942},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX942},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX942},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX942},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX942},
// Aliases
{{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10},
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index c521d0dd3ad2d..a87c1d17bc232 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -543,9 +543,9 @@ bool getMAIIsDGEMM(unsigned Opc) {
return Info ? Info->is_dgemm : false;
}
-bool getMAIIsGFX940XDL(unsigned Opc) {
+bool getMAIIsGFX942XDL(unsigned Opc) {
const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
- return Info ? Info->is_gfx940_xdl : false;
+ return Info ? Info->is_gfx942_xdl : false;
}
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
@@ -2284,8 +2284,8 @@ bool isGFX90A(const MCSubtargetInfo &STI) {
return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
}
-bool isGFX940(const MCSubtargetInfo &STI) {
- return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
+bool isGFX942(const MCSubtargetInfo &STI) {
+ return STI.hasFeature(AMDGPU::FeatureGFX942Insts);
}
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 103993e6435de..c7b9440a7e95a 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -99,7 +99,7 @@ struct GcnBufferFormatInfo {
struct MAIInstInfo {
uint16_t Opcode;
bool is_dgemm;
- bool is_gfx940_xdl;
+ bool is_gfx942_xdl;
};
struct MFMA_F8F6F4_Info {
@@ -591,7 +591,7 @@ LLVM_READONLY
bool getMAIIsDGEMM(unsigned Opc);
LLVM_READONLY
-bool getMAIIsGFX940XDL(unsigned Opc);
+bool getMAIIsGFX942XDL(unsigned Opc);
struct CanBeVOPD {
bool X;
@@ -1370,7 +1370,7 @@ bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
bool isGFX90A(const MCSubtargetInfo &STI);
-bool isGFX940(const MCSubtargetInfo &STI);
+bool isGFX942(const MCSubtargetInfo &STI);
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
bool hasMAIInsts(const MCSubtargetInfo &STI);
bool hasVOPD(const MCSubtargetInfo &STI);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index a407ae797a48b..1a3a7ec52c3b2 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -238,7 +238,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
-let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
+let SubtargetPredicate = isGFX942Plus, SchedRW = [Write64Bit] in
defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
@@ -1558,7 +1558,7 @@ multiclass VOP1_OpSel_Real_e32e64_gfx9 <bits<10> op> {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
-let AssemblerPredicate = isGFX940Plus in
+let AssemblerPredicate = isGFX942Plus in
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 900c91731aa1b..95ff1165a4ce3 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -2367,8 +2367,8 @@ multiclass VOP2_Real_MADK_vi <bits<6> op> {
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
}
-multiclass VOP2_Real_MADK_gfx940 <bits<6> op> {
- def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>,
+multiclass VOP2_Real_MADK_gfx942 <bits<6> op> {
+ def _gfx942 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX942>,
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> {
let DecoderNamespace = "GFX9";
}
@@ -2668,8 +2668,8 @@ let IsSingle = 1 in {
}
let SubtargetPredicate = HasFmaakFmamkF32Insts in {
-defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>;
-defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>;
+defm V_FMAMK_F32 : VOP2_Real_MADK_gfx942 <0x17>;
+defm V_FMAAK_F32 : VOP2_Real_MADK_gfx942 <0x18>;
}
multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> {
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index afafc2ecccfaf..3824383aeace0 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -687,7 +687,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
// src0 is shifted left by 0-4 (use “0” to get ADD_U64).
-let SubtargetPredicate = isGFX940Plus in
+let SubtargetPredicate = isGFX942Plus in
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
@@ -705,7 +705,7 @@ let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
// These instructions have non-standard use of op_sel. In particular they are
// using op_sel bits 2 and 3 while only having two sources. Therefore dummy
// src2 is used to hold the op_sel value.
- let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX940Plus in {
+ let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX942Plus in {
defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>;
defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>;
}
@@ -734,7 +734,7 @@ foreach Index = [0, -1] in {
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
}
-let SubtargetPredicate = isGFX940Plus in {
+let SubtargetPredicate = isGFX942Plus in {
foreach Index = [0, 1, 2, 3] in {
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
@@ -766,7 +766,7 @@ def : GCNPat<
(DivergentBinFrag<mul> i32:$src0, IsPow2Plus1:$src1),
(V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;
-let SubtargetPredicate = isGFX940Plus in
+let SubtargetPredicate = isGFX942Plus in
def : GCNPat<
(ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2),
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index d5c6e8af109f4..730fa17d998b7 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -880,7 +880,7 @@ class MAIInst<string OpName, VOPProfile P, SDPatternOperator node, bit Scaled =
let SubtargetPredicate = HasMAIInsts;
Instruction Opcode = !cast<Instruction>(NAME);
bit is_dgemm = 0;
- bit is_gfx940_xdl = 0;
+ bit is_gfx942_xdl = 0;
let PseudoInstr = NAME; // FIXME: Why is this not the default
}
@@ -1005,7 +1005,7 @@ defm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4",
defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>;
defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>;
-let is_gfx940_xdl = 1 in {
+let is_gfx942_xdl = 1 in {
defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>;
defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>;
defm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>;
@@ -1026,7 +1026,7 @@ defm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32",
defm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>;
}
-let SubtargetPredicate = HasGFX950Insts, is_gfx940_xdl = 1 in {
+let SubtargetPredicate = HasGFX950Insts, is_gfx942_xdl = 1 in {
defm V_MFMA_F32_16X16X32_F16 : MAIInst<"v_mfma_f32_16x16x32f16", "F32_V8F16_X32", int_amdgcn_mfma_f32_16x16x32_f16>;
defm V_MFMA_F32_32X32X16_F16 : MAIInst<"v_mfma_f32_32x32x16f16", "F32_V8F16_X16", int_amdgcn_mfma_f32_32x32x16_f16>;
defm V_MFMA_F32_16X16X32_BF16 : MAIInst<"v_mfma_f32_16x16x32bf16", "F32_V8BF16_X4", int_amdgcn_mfma_f32_16x16x32_bf16>;
@@ -1054,7 +1054,7 @@ defm V_MFMA_LD_SCALE_B32 : VOP3PInst<"v_mfma_ld_scale_b32", VOP_MFMA_LD_SCALE>;
}
let SubtargetPredicate = isGFX90APlus in {
- let is_gfx940_xdl = 1 in {
+ let is_gfx942_xdl = 1 in {
defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>;
defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>;
defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>;
@@ -1068,17 +1068,17 @@ let SubtargetPredicate = isGFX90APlus in {
}
} // End SubtargetPredicate = isGFX90APlus
-let SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in {
+let SubtargetPredicate = isGFX942Plus, is_gfx942_xdl = 1 in {
defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>;
defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>;
-} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1
+} // End SubtargetPredicate = isGFX942Plus, is_gfx942_xdl = 1
-let SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 in {
+let SubtargetPredicate = HasXF32Insts, is_gfx942_xdl = 1 in {
defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>;
defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>;
-} // End SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1
+} // End SubtargetPredicate = HasXF32Insts, is_gfx942_xdl = 1
-let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in {
+let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in {
defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>;
defm V_MFMA_F32_16X16X32_BF8_FP8 : MAIInst<"v_mfma_f32_16x16x32_bf8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_fp8>;
defm V_MFMA_F32_16X16X32_FP8_BF8 : MAIInst<"v_mfma_f32_16x16x32_fp8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_bf8>;
@@ -1087,16 +1087,16 @@ let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in {
defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>;
defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>;
defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>;
-} // End SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1
+} // End SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1
multiclass SMFMACInst<string OpName, string P, SDPatternOperator node> {
let Constraints = "$vdst = $src2", DisableEncoding = "$src2",
- isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in {
+ isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx942_xdl = 1 in {
def _e64 : MAIInst<OpName, !cast<VOPProfileSMFMAC>("VOPProfileSMFMAC_" # P), node>;
}
}
-let SubtargetPredicate = isGFX940Plus in {
+let SubtargetPredicate = isGFX942Plus in {
defm V_SMFMAC_F32_16X16X32_F16 : SMFMACInst<"v_smfmac_f32_16x16x32_f16", "F32_16X16X32_F16", int_amdgcn_smfmac_f32_16x16x32_f16>;
defm V_SMFMAC_F32_32X32X16_F16 : SMFMACInst<"v_smfmac_f32_32x32x16_f16", "F32_32X32X16_F16", int_amdgcn_smfmac_f32_32x32x16_f16>;
defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", "F32_16X16X32_I16", int_amdgcn_smfmac_f32_16x16x32_bf16>;
@@ -1105,7 +1105,7 @@ defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8",
defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>;
}
-let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in {
+let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in {
defm V_SMFMAC_F32_16X16X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>;
defm V_SMFMAC_F32_16X16X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>;
defm V_SMFMAC_F32_16X16X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>;
@@ -1114,7 +1114,7 @@ defm V_SMFMAC_F32_32X32X32_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_bf8",
defm V_SMFMAC_F32_32X32X32_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>;
defm V_SMFMAC_F32_32X32X32_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>;
defm V_SMFMAC_F32_32X32X32_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>;
-} // End SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1
+} // End SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1
let SubtargetPredicate = HasGFX950Insts in {
defm V_SMFMAC_F32_16X16X64_F16 : SMFMACInst<"v_smfmac_f32_16x16x64_f16", "F32_16X16X64_F16", int_amdgcn_smfmac_f32_16x16x64_f16>;
@@ -1137,7 +1137,7 @@ def MAIInstInfoTable : GenericTable {
let FilterClass = "MAIInst";
let CppTypeName = "MAIInstInfo";
let Fields = [
- "Opcode", "is_dgemm", "is_gfx940_xdl"
+ "Opcode", "is_dgemm", "is_gfx942_xdl"
];
let PrimaryKey = ["Opcode"];
@@ -1947,7 +1947,7 @@ multiclass VOP3P_Real_MFMA_gfx90a<bits<7> op> {
}
}
-multiclass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string Op,
+multiclass VOP3P_Real_MFMA_gfx942_aliases<string NameFrom, string NameTo, string Op,
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(Op # "_e64"),
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(Op # "_vgprcd" # "_e64"),
VOPProfile Pfl_ACD = PS_ACD.Pfl,
@@ -1956,59 +1956,59 @@ multiclass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string
let SubtargetPredicate = PS_ACD.SubtargetPredicate,
OtherPredicates = PS_ACD.OtherPredicates in {
def : InstAlias <NameTo # " " # PS_ACD.AsmOperands,
- (!cast<VOP3P_Real>(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst,
+ (!cast<VOP3P_Real>(Op # "_gfx942_acd") Pfl_ACD.DstRC:$vdst,
Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2,
CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl;
def : InstAlias <NameTo # " " # PS_VCD.AsmOperands,
- (!cast<VOP3P_Real>(Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst,
+ (!cast<VOP3P_Real>(Op # "_gfx942_vcd") Pfl_VCD.DstRC:$vdst,
Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2,
CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl;
}
}
}
-multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
+multiclass VOP3P_Real_MFMA_gfx942<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
- let AssemblerPredicate = isGFX940Plus,
- DecoderNamespace = "GFX940",
+ let AssemblerPredicate = isGFX942Plus,
+ DecoderNamespace = "GFX942",
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
- def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
+ def _gfx942_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX942>,
VOP3Pe_MAI <op, PS_ACD.Pfl, 1>;
- def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
+ def _gfx942_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX942>,
VOP3Pe_MAI <op, PS_VCD.Pfl, 0>;
- } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940"
+ } // End AssemblerPredicate = isGFX942Plus, DecoderNamespace = "GFX942"
let SubtargetPredicate = PS_ACD.SubtargetPredicate,
OtherPredicates = PS_ACD.OtherPredicates,
- AssemblerPredicate = isGFX940Plus
+ AssemblerPredicate = isGFX942Plus
in {
- defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>;
+ defm : VOP3P_Real_MFMA_gfx942_aliases<Name, PS_ACD.Mnemonic, NAME>;
if !ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic) then
- defm : VOP3P_Real_MFMA_gfx940_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>;
+ defm : VOP3P_Real_MFMA_gfx942_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>;
}
}
-multiclass VOP3P_Real_MFMA_F8F6F4_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
+multiclass VOP3P_Real_MFMA_F8F6F4_gfx942<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8";
- let AssemblerPredicate = isGFX940Plus,
- DecoderNamespace = "GFX940",
+ let AssemblerPredicate = isGFX942Plus,
+ DecoderNamespace = "GFX942",
AsmString = Name # PS_ACD.AsmOperands,
Constraints = "" in {
- def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
+ def _gfx942_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX942>,
VOP3Pe_MAI <op, PS_ACD.Pfl, 1>,
- MFMA_F8F6F4_WithSizeTable_Helper<PS_ACD, F8F8Name#"_gfx940_acd">;
+ MFMA_F8F6F4_WithSizeTable_Helper<PS_ACD, F8F8Name#"_gfx942_acd">;
- def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
+ def _gfx942_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX942>,
VOP3Pe_MAI <op, PS_VCD.Pfl, 0>,
- MFMA_F8F6F4_WithSizeTable_Helper<PS_VCD, F8F8Name#"_gfx940_vcd">;
- } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940"
+ MFMA_F8F6F4_WithSizeTable_Helper<PS_VCD, F8F8Name#"_gfx942_vcd">;
+ } // End AssemblerPredicate = isGFX942Plus, DecoderNamespace = "GFX942"
}
multiclass VOP3P_Real_MFMA_gfx950<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
@@ -2016,23 +2016,23 @@ multiclass VOP3P_Real_MFMA_gfx950<bits<7> op, string Name = !cast<VOP3_Pseudo>(N
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
let SubtargetPredicate = HasGFX950Insts,
AssemblerPredicate = HasGFX950Insts in {
- defm "" : VOP3P_Real_MFMA_gfx940<op, Name, PS_ACD, PS_VCD>;
+ defm "" : VOP3P_Real_MFMA_gfx942<op, Name, PS_ACD, PS_VCD>;
}
}
multiclass VOP3P_Real_MFMA_F8F6F4_gfx950_mc<bits<7> op, string Name> {
- defm _f8_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
+ defm _f8_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
let isAsmParserOnly = true in { // Disable ambiguous disassembly.
- defm _f8_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
- defm _f6_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
- defm _f8_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
- defm _f4_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
- defm _f6_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
- defm _f6_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
- defm _f4_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
- defm _f4_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
+ defm _f8_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
+ defm _f6_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
+ defm _f8_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
+ defm _f4_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
+ defm _f6_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
+ defm _f6_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
+ defm _f4_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
+ defm _f4_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
}
}
@@ -2043,15 +2043,15 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> {
defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8";
let SubtargetPredicate = HasGFX950Insts,
- DecoderNamespace = "GFX940",
+ DecoderNamespace = "GFX942",
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
- def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
+ def _gfx942_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX942>,
VOP3PXe <op, PS_ACD.Pfl, /*acc_cd=*/1>,
- MFMA_F8F6F4_WithSizeTable_Helper<PS_ACD, F8F8Name#"_gfx940_acd">;
+ MFMA_F8F6F4_WithSizeTable_Helper<PS_ACD, F8F8Name#"_gfx942_acd">;
- def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
+ def _gfx942_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX942>,
VOP3PXe <op, PS_VCD.Pfl, /*acc_cd=*/0>,
- MFMA_F8F6F4_WithSizeTable_Helper<PS_VCD, F8F8Name#"_gfx940_vcd">;
+ MFMA_F8F6F4_WithSizeTable_Helper<PS_VCD, F8F8Name#"_gfx942_vcd">;
}
}
@@ -2084,18 +2084,18 @@ multiclass VOP3P_Real_MFMA_vi_gfx90a<bits<7> op> :
VOP3P_Real_MFMA_gfx90a <op>,
VOP3P_Real_MFMA_vi <op>;
-multiclass VOP3P_Real_MFMA<bits<7> op, string GFX940Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
+multiclass VOP3P_Real_MFMA<bits<7> op, string GFX942Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
VOP3P_Real_MFMA_vi_gfx90a <op>,
- VOP3P_Real_MFMA_gfx940 <op, GFX940Name>;
+ VOP3P_Real_MFMA_gfx942 <op, GFX942Name>;
multiclass VOP3P_Real_SMFMAC<bits<7> op, string alias> {
- def _gfx940 : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ def _gfx942 : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
VOP3Pe_SMFMAC <op> {
- let AssemblerPredicate = isGFX940Plus;
+ let AssemblerPredicate = isGFX942Plus;
let DecoderNamespace = "GFX8";
}
def : AMDGPUMnemonicAlias<alias, !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> {
- let AssemblerPredicate = isGFX940Plus;
+ let AssemblerPredicate = isGFX942Plus;
}
}
@@ -2193,28 +2193,28 @@ defm V_MFMA_F32_32X32X64_F8F6F4 : VOP3P_Real_MFMA_F8F6F4_gfx950_mc <0x2e, "v_mf
defm V_MFMA_SCALE_F32_32X32X64_F8F6F4 : VOP3PX_Real_ScaledMFMA_F8F6F4_mc <0x2e>;
defm V_DOT2_F32_BF16 : VOP3P_Real_vi<0x1a>;
-defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
-defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
-defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">;
-defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">;
-
-defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>;
-defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>;
-defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x72>;
-defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x73>;
-defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x74>;
-defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x75>;
-defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x76>;
-defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x77>;
-
-defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">;
-defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">;
-defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">;
-defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx940 <0x60, "v_mfma_f32_32x32x8_bf16">;
-defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx940 <0x61, "v_mfma_f32_16x16x16_bf16">;
-
-defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx940 <0x6e, "v_mfma_f64_16x16x4_f64">;
-defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx940 <0x6f, "v_mfma_f64_4x4x4_4b_f64">;
+defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx942 <0x56, "v_mfma_i32_32x32x16_i8">;
+defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx942 <0x57, "v_mfma_i32_16x16x32_i8">;
+defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx942 <0x3e, "v_mfma_f32_16x16x8_xf32">;
+defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx942 <0x3f, "v_mfma_f32_32x32x4_xf32">;
+
+defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx942 <0x70>;
+defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx942 <0x71>;
+defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx942 <0x72>;
+defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx942 <0x73>;
+defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx942 <0x74>;
+defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx942 <0x75>;
+defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx942 <0x76>;
+defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx942 <0x77>;
+
+defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">;
+defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">;
+defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">;
+defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx942 <0x60, "v_mfma_f32_32x32x8_bf16">;
+defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx942 <0x61, "v_mfma_f32_16x16x16_bf16">;
+
+defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx942 <0x6e, "v_mfma_f64_16x16x4_f64">;
+defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx942 <0x6f, "v_mfma_f64_4x4x4_4b_f64">;
defm V_SMFMAC_F32_16X16X32_F16 : VOP3P_Real_SMFMAC <0x62, "v_smfmac_f32_16x16x32f16">;
defm V_SMFMAC_F32_32X32X16_F16 : VOP3P_Real_SMFMAC <0x64, "v_smfmac_f32_32x32x16f16">;
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 0a605dfd017cb..13cb0d6461eb8 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -104,8 +104,6 @@ constexpr GPUInfo AMDGCNGPUs[] = {
{{"gfx909"}, {"gfx909"}, GK_GFX909, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
{{"gfx90a"}, {"gfx90a"}, GK_GFX90A, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx90c"}, {"gfx90c"}, GK_GFX90C, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK},
- {{"gfx940"}, {"gfx940"}, GK_GFX940, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
- {{"gfx941"}, {"gfx941"}, GK_GFX941, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx942"}, {"gfx942"}, GK_GFX942, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx950"}, {"gfx950"}, GK_GFX950, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_XNACK|FEATURE_SRAMECC},
{{"gfx1010"}, {"gfx1010"}, GK_GFX1010, FEATURE_FAST_FMA_F32|FEATURE_FAST_DENORMAL_F32|FEATURE_WAVE32|FEATURE_XNACK|FEATURE_WGP},
@@ -260,8 +258,6 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
case GK_GFX909: return {9, 0, 9};
case GK_GFX90A: return {9, 0, 10};
case GK_GFX90C: return {9, 0, 12};
- case GK_GFX940: return {9, 4, 0};
- case GK_GFX941: return {9, 4, 1};
case GK_GFX942: return {9, 4, 2};
case GK_GFX950: return {9, 5, 0};
case GK_GFX1010: return {10, 1, 0};
@@ -362,7 +358,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gfx8-insts"] = true;
Features["gfx9-insts"] = true;
Features["gfx90a-insts"] = true;
- Features["gfx940-insts"] = true;
+ Features["gfx942-insts"] = true;
Features["gfx950-insts"] = true;
Features["gfx10-insts"] = true;
Features["gfx10-3-insts"] = true;
@@ -506,15 +502,13 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gfx950-insts"] = true;
[[fallthrough]];
case GK_GFX942:
- case GK_GFX941:
- case GK_GFX940:
Features["fp8-insts"] = true;
Features["fp8-conversion-insts"] = true;
if (Kind != GK_GFX950)
Features["xf32-insts"] = true;
[[fallthrough]];
case GK_GFX9_4_GENERIC:
- Features["gfx940-insts"] = true;
+ Features["gfx942-insts"] = true;
Features["atomic-ds-pk-add-16-insts"] = true;
Features["atomic-flat-pk-add-16-insts"] = true;
Features["atomic-global-pk-add-bf16-inst"] = true;
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 2da08127f20a8..fdae09ac767e6 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1624,8 +1624,6 @@ const EnumEntry<unsigned> ElfHeaderMipsFlags[] = {
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX909, "gfx909"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90A, "gfx90a"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX90C, "gfx90c"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX940, "gfx940"), \
- ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX941, "gfx941"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX942, "gfx942"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX950, "gfx950"), \
ENUM_ENT(EF_AMDGPU_MACH_AMDGCN_GFX1010, "gfx1010"), \
>From 707cfbd95712689ddcbfbe9c51ec612c737c4e6e Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Wed, 12 Feb 2025 03:44:08 -0500
Subject: [PATCH 2/3] Revert part of the initial PR to leave the subtarget
feature alone.
Also revert the FeatureForceStoreSC0SC1 removal so that it can be
handled in a separate PR.
---
clang/include/clang/Basic/BuiltinsAMDGPU.def | 4 +-
.../CodeGenCXX/dynamic-cast-address-space.cpp | 4 +-
clang/test/CodeGenOpenCL/amdgpu-features.cl | 6 +-
.../builtins-amdgcn-fp-atomics-gfx90a-err.cl | 2 +-
llvm/lib/Target/AMDGPU/AMDGPU.td | 46 +++---
.../AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 28 ++--
llvm/lib/Target/AMDGPU/BUFInstructions.td | 22 +--
.../Disassembler/AMDGPUDisassembler.cpp | 6 +-
llvm/lib/Target/AMDGPU/FLATInstructions.td | 94 +++++------
.../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 93 +++++------
llvm/lib/Target/AMDGPU/GCNSubtarget.h | 41 +++--
.../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp | 21 ++-
llvm/lib/Target/AMDGPU/SIDefines.h | 4 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 +-
llvm/lib/Target/AMDGPU/SIInstrInfo.td | 4 +-
llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp | 44 +++--
.../Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp | 10 +-
.../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 8 +-
llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h | 6 +-
llvm/lib/Target/AMDGPU/VOP1Instructions.td | 4 +-
llvm/lib/Target/AMDGPU/VOP2Instructions.td | 8 +-
llvm/lib/Target/AMDGPU/VOP3Instructions.td | 8 +-
llvm/lib/Target/AMDGPU/VOP3PInstructions.td | 154 +++++++++---------
llvm/lib/TargetParser/TargetParser.cpp | 4 +-
24 files changed, 328 insertions(+), 297 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsAMDGPU.def b/clang/include/clang/Basic/BuiltinsAMDGPU.def
index e7e5ed77f432b..39e295aced96b 100644
--- a/clang/include/clang/Basic/BuiltinsAMDGPU.def
+++ b/clang/include/clang/Basic/BuiltinsAMDGPU.def
@@ -248,13 +248,13 @@ TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fmax_f64, "dd*0d", "t", "gfx90a-inst
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f64, "dd*3d", "t", "gfx90a-insts")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_f32, "ff*3f", "t", "gfx8-insts")
-TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx942-insts")
+TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_f32, "ff*0f", "t", "gfx940-insts")
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2f16, "V2hV2h*0V2h", "t", "atomic-flat-pk-add-16-insts")
TARGET_BUILTIN(__builtin_amdgcn_flat_atomic_fadd_v2bf16, "V2sV2s*0V2s", "t", "atomic-flat-pk-add-16-insts")
TARGET_BUILTIN(__builtin_amdgcn_global_atomic_fadd_v2bf16, "V2sV2s*1V2s", "t", "atomic-global-pk-add-bf16-inst")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2bf16, "V2sV2s*3V2s", "t", "atomic-ds-pk-add-16-insts")
TARGET_BUILTIN(__builtin_amdgcn_ds_atomic_fadd_v2f16, "V2hV2h*3V2h", "t", "atomic-ds-pk-add-16-insts")
-TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx942-insts")
+TARGET_BUILTIN(__builtin_amdgcn_global_load_lds, "vv*1v*3IUiIiIUi", "t", "gfx940-insts")
//===----------------------------------------------------------------------===//
// Deep learning builtins.
diff --git a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
index f07dbd9a29b98..0460352cf7ffc 100644
--- a/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
+++ b/clang/test/CodeGenCXX/dynamic-cast-address-space.cpp
@@ -112,9 +112,9 @@ const B& f(A *a) {
// CHECK: attributes #[[ATTR3]] = { nounwind }
// CHECK: attributes #[[ATTR4]] = { noreturn }
//.
-// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
+// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR0]] = { mustprogress noinline optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR1:[0-9]+]] = { nounwind willreturn memory(read) }
-// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
+// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR2:[0-9]+]] = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot11-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dot8-insts,+dot9-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx10-3-insts,+gfx10-insts,+gfx11-insts,+gfx12-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+gws,+image-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize32,+wavefrontsize64" }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR3]] = { nounwind }
// WITH-NONZERO-DEFAULT-AS: attributes #[[ATTR4]] = { noreturn }
//.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl
index 2c9f3c78b1df2..d12dcead6fadf 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-features.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl
@@ -83,9 +83,9 @@
// GFX909: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX90A: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-fadd-rtn-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX90C: "target-features"="+16-bit-insts,+ci-insts,+dpp,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
-// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
-// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx942-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX942: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+fp8-conversion-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64,+xf32-insts"
+// GFX9_4_Generic: "target-features"="+16-bit-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+mai-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
+// GFX950: "target-features"="+16-bit-insts,+ashr-pk-insts,+atomic-buffer-global-pk-add-f16-insts,+atomic-buffer-pk-add-bf16-inst,+atomic-ds-pk-add-16-insts,+atomic-fadd-rtn-insts,+atomic-flat-pk-add-16-insts,+atomic-global-pk-add-bf16-inst,+bf8-cvt-scale-insts,+bitop3-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot12-insts,+dot13-insts,+dot2-insts,+dot3-insts,+dot4-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+f16bf16-to-fp6bf6-cvt-scale-insts,+f32-to-f16bf16-cvt-sr-insts,+fp4-cvt-scale-insts,+fp6bf6-cvt-scale-insts,+fp8-conversion-insts,+fp8-cvt-scale-insts,+fp8-insts,+gfx8-insts,+gfx9-insts,+gfx90a-insts,+gfx940-insts,+gfx950-insts,+mai-insts,+permlane16-swap,+permlane32-swap,+prng-inst,+s-memrealtime,+s-memtime-inst,+wavefrontsize64"
// GFX1010: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1011: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
// GFX1012: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot10-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst,+wavefrontsize32"
diff --git a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
index 86d84005133bc..f651ce349e206 100644
--- a/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
+++ b/clang/test/CodeGenOpenCL/builtins-amdgcn-fp-atomics-gfx90a-err.cl
@@ -9,7 +9,7 @@ typedef short __attribute__((ext_vector_type(2))) short2;
void test_atomic_fadd(__global half2 *addrh2, __local half2 *addrh2l, half2 xh2,
__global short2 *addrs2, __local short2 *addrs2l, short2 xs2,
__global float *addrf, float xf) {
- __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx942-insts}}
+ __builtin_amdgcn_flat_atomic_fadd_f32(addrf, xf); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_f32' needs target feature gfx940-insts}}
__builtin_amdgcn_flat_atomic_fadd_v2f16(addrh2, xh2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2f16' needs target feature atomic-flat-pk-add-16-insts}}
__builtin_amdgcn_flat_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_flat_atomic_fadd_v2bf16' needs target feature atomic-flat-pk-add-16-insts}}
__builtin_amdgcn_global_atomic_fadd_v2bf16(addrs2, xs2); // expected-error{{'__builtin_amdgcn_global_atomic_fadd_v2bf16' needs target feature atomic-global-pk-add-bf16-inst}}
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index e0da312c51a82..3aabca49b249e 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -372,10 +372,10 @@ def FeatureGFX90AInsts : SubtargetFeature<"gfx90a-insts",
// [HasAtomicFMinFMaxF64GlobalInsts, HasAtomicFMinFMaxF64FlatInsts] // TODO
>;
-def FeatureGFX942Insts : SubtargetFeature<"gfx942-insts",
- "GFX942Insts",
+def FeatureGFX940Insts : SubtargetFeature<"gfx940-insts",
+ "GFX940Insts",
"true",
- "Additional instructions for GFX942+"
+ "Additional instructions for GFX940+"
>;
def FeaturePermlane16Swap : SubtargetFeature<"permlane16-swap",
@@ -1040,6 +1040,12 @@ def FeatureVALUTransUseHazard : SubtargetFeature<"valu-trans-use-hazard",
"Hazard when TRANS instructions are closely followed by a use of the result"
>;
+def FeatureForceStoreSC0SC1 : SubtargetFeature<"force-store-sc0-sc1",
+ "HasForceStoreSC0SC1",
+ "true",
+ "Has SC0 and SC1 on stores"
+>;
+
def FeatureSALUFloatInsts : SubtargetFeature<"salu-float",
"HasSALUFloatInsts",
"true",
@@ -1558,7 +1564,7 @@ def FeatureISAVersion9_0_C : FeatureSet<
def FeatureISAVersion9_4_Common : FeatureSet<
[FeatureGFX9,
FeatureGFX90AInsts,
- FeatureGFX942Insts,
+ FeatureGFX940Insts,
FeatureFmaMixInsts,
FeatureLDSBankCount32,
FeatureDLInsts,
@@ -2047,20 +2053,20 @@ def isGFX8GFX9NotGFX90A :
AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX90AInsts))>;
def isGFX90AOnly :
- Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX942Insts()">,
- AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX942Insts))>;
+ Predicate<"Subtarget->hasGFX90AInsts() && !Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of FeatureGFX90AInsts, (not FeatureGFX940Insts))>;
def isGFX908orGFX90A :
- Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX942Insts()">,
- AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX942Insts))>;
+ Predicate<"Subtarget->hasMAIInsts() && !Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of FeatureMAIInsts, (not FeatureGFX940Insts))>;
-def isGFX942Plus :
- Predicate<"Subtarget->hasGFX942Insts()">,
- AssemblerPredicate<(all_of FeatureGFX942Insts)>;
+def isGFX940Plus :
+ Predicate<"Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of FeatureGFX940Insts)>;
-def isNotGFX942Plus :
- Predicate<"!Subtarget->hasGFX942Insts()">,
- AssemblerPredicate<(all_of (not FeatureGFX942Insts))>;
+def isNotGFX940Plus :
+ Predicate<"!Subtarget->hasGFX940Insts()">,
+ AssemblerPredicate<(all_of (not FeatureGFX940Insts))>;
def HasGFX950Insts :
Predicate<"Subtarget->hasGFX950Insts()">,
@@ -2074,11 +2080,11 @@ def HasPermlane32Swap :
Predicate<"Subtarget->hasPermlane32Swap()">,
AssemblerPredicate<(all_of FeaturePermlane32Swap)>;
-def isGFX8GFX9NotGFX942 :
- Predicate<"!Subtarget->hasGFX942Insts() &&"
+def isGFX8GFX9NotGFX940 :
+ Predicate<"!Subtarget->hasGFX940Insts() &&"
"(Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
" Subtarget->getGeneration() == AMDGPUSubtarget::GFX9)">,
- AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX942Insts))>;
+ AssemblerPredicate<(all_of FeatureGFX8Insts, FeatureGCN3Encoding, (not FeatureGFX940Insts))>;
def isGFX8GFX9 :
Predicate<"Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS ||"
@@ -2185,9 +2191,9 @@ def HasD16LoadStore : Predicate<"Subtarget->hasD16LoadStore()">,
AssemblerPredicate<(all_of FeatureGFX9Insts)>;
def HasFlatScratchSTMode : Predicate<"Subtarget->hasFlatScratchSTMode()">,
- AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX942Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX10_3Insts, FeatureGFX940Insts)>;
def HasFlatScratchSVSMode : Predicate<"Subtarget->hasFlatScratchSVSMode()">,
- AssemblerPredicate<(any_of FeatureGFX942Insts, FeatureGFX11Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX940Insts, FeatureGFX11Insts)>;
def HasGFX10_AEncoding : Predicate<"Subtarget->hasGFX10_AEncoding()">,
AssemblerPredicate<(all_of FeatureGFX10_AEncoding)>;
@@ -2295,7 +2301,7 @@ def HasPkMovB32 : Predicate<"Subtarget->hasPkMovB32()">,
def HasFmaakFmamkF32Insts :
Predicate<"Subtarget->hasFmaakFmamkF32Insts()">,
- AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX942Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>;
def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">,
AssemblerPredicate<(all_of FeatureImageInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 13ace855caee4..54ed3789326cb 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -1492,12 +1492,14 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
return AMDGPU::isGFX9(getSTI());
}
- // TODO: isGFX90A is also true for GFX942. We need to clean it.
+ // TODO: isGFX90A is also true for GFX940. We need to clean it.
bool isGFX90A() const {
return AMDGPU::isGFX90A(getSTI());
}
- bool isGFX942() const { return AMDGPU::isGFX942(getSTI()); }
+ bool isGFX940() const {
+ return AMDGPU::isGFX940(getSTI());
+ }
bool isGFX9Plus() const {
return AMDGPU::isGFX9Plus(getSTI());
@@ -4631,7 +4633,7 @@ bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
uint64_t TSFlags = MII.get(Opc).TSFlags;
- if (isGFX942() && (TSFlags & SIInstrFlags::IsDOT)) {
+ if (isGFX940() && (TSFlags & SIInstrFlags::IsDOT)) {
int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
if (OpSelIdx != -1) {
if (Inst.getOperand(OpSelIdx).getImm() != 0)
@@ -4940,12 +4942,12 @@ bool AMDGPUAsmParser::validateBLGP(const MCInst &Inst,
bool IsNeg = StringRef(BLGPLoc.getPointer()).starts_with("neg:");
auto FB = getFeatureBits();
bool UsesNeg = false;
- if (FB[AMDGPU::FeatureGFX942Insts]) {
+ if (FB[AMDGPU::FeatureGFX940Insts]) {
switch (Opc) {
- case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_acd:
- case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_vcd:
- case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_acd:
- case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_vcd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
UsesNeg = true;
}
}
@@ -5060,7 +5062,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
}
}
- if (isGFX90A() && !isGFX942() && (CPol & CPol::SCC)) {
+ if (isGFX90A() && !isGFX940() && (CPol & CPol::SCC)) {
const uint64_t AllowSCCModifier = SIInstrFlags::MUBUF |
SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
SIInstrFlags::FLAT;
@@ -5079,7 +5081,7 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
if (TSFlags & SIInstrFlags::IsAtomicRet) {
if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
- Error(IDLoc, isGFX942() ? "instruction must use sc0"
+ Error(IDLoc, isGFX940() ? "instruction must use sc0"
: "instruction must use glc");
return false;
}
@@ -5088,8 +5090,8 @@ bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
StringRef CStr(S.getPointer());
S = SMLoc::getFromPointer(
- &CStr.data()[CStr.find(isGFX942() ? "sc0" : "glc")]);
- Error(S, isGFX942() ? "instruction must not use sc0"
+ &CStr.data()[CStr.find(isGFX940() ? "sc0" : "glc")]);
+ Error(S, isGFX940() ? "instruction must not use sc0"
: "instruction must not use glc");
return false;
}
@@ -6655,7 +6657,7 @@ unsigned AMDGPUAsmParser::getCPolKind(StringRef Id, StringRef Mnemo,
bool &Disabling) const {
Disabling = Id.consume_front("no");
- if (isGFX942() && !Mnemo.starts_with("s_")) {
+ if (isGFX940() && !Mnemo.starts_with("s_")) {
return StringSwitch<unsigned>(Id)
.Case("nt", AMDGPU::CPol::NT)
.Case("sc0", AMDGPU::CPol::SC0)
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index a48115fbfb272..f2686bdf56b41 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1146,7 +1146,7 @@ let OtherPredicates = [HasGFX10_BEncoding] in {
>;
}
-let SubtargetPredicate = isGFX8GFX9NotGFX942 in {
+let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
def BUFFER_STORE_LDS_DWORD : MUBUF_Pseudo_Store_Lds <"buffer_store_lds_dword">;
}
@@ -1228,7 +1228,7 @@ defm BUFFER_STORE_FORMAT_D16_HI_X : MUBUF_Pseudo_Stores <
} // End HasD16LoadStore
-let SubtargetPredicate = isNotGFX942Plus in
+let SubtargetPredicate = isNotGFX940Plus in
def BUFFER_WBINVL1 : MUBUF_Invalidate <
"buffer_wbinvl1", int_amdgcn_buffer_wbinvl1
>;
@@ -1311,7 +1311,7 @@ let SubtargetPredicate = isGFX7Plus in {
// Instruction definitions for CI and newer.
//===----------------------------------------------------------------------===//
-let SubtargetPredicate = isNotGFX942Plus in
+let SubtargetPredicate = isNotGFX940Plus in
def BUFFER_WBINVL1_VOL : MUBUF_Invalidate <"buffer_wbinvl1_vol",
int_amdgcn_buffer_wbinvl1_vol>;
@@ -1341,7 +1341,7 @@ let SubtargetPredicate = HasAtomicFMinFMaxF64GlobalInsts in {
}
def BUFFER_INV : MUBUF_Invalidate<"buffer_inv"> {
- let SubtargetPredicate = isGFX942Plus;
+ let SubtargetPredicate = isGFX940Plus;
let has_glc = 1;
let has_sccb = 1;
let InOperandList = (ins CPol_0:$cpol);
@@ -3095,9 +3095,9 @@ multiclass MUBUF_Real_gfx90a <bits<7> op,
}
}
-class MUBUF_Real_gfx942 <bits<7> op, MUBUF_Pseudo ps> :
- MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX942> {
- let AssemblerPredicate = isGFX942Plus;
+class MUBUF_Real_gfx940 <bits<7> op, MUBUF_Pseudo ps> :
+ MUBUF_Real_Base_vi<op, ps, SIEncodingFamily.GFX940> {
+ let AssemblerPredicate = isGFX940Plus;
let DecoderNamespace = "GFX9";
let AsmString = ps.Mnemonic # ps.AsmOperands;
@@ -3116,7 +3116,7 @@ multiclass MUBUF_Real_vi_gfx90a<bits<7> op, bit isTFE = 0> : MUBUF_Real_vi<op> {
let AssemblerPredicate = isGFX90AOnly in
defm NAME : MUBUF_Real_gfx90a<op, 0>;
- def _gfx942 : MUBUF_Real_gfx942<op, ps>;
+ def _gfx940 : MUBUF_Real_gfx940<op, ps>;
}
}
@@ -3314,9 +3314,9 @@ let AsmString = BUFFER_WBL2.Mnemonic, // drop flags
defm BUFFER_WBL2 : MUBUF_Real_gfx90a<0x28>;
defm BUFFER_INVL2 : MUBUF_Real_gfx90a<0x29>;
-let SubtargetPredicate = isGFX942Plus in {
-def BUFFER_WBL2_gfx942 : MUBUF_Real_gfx942<0x28, BUFFER_WBL2>;
-def BUFFER_INV_gfx942 : MUBUF_Real_gfx942<0x29, BUFFER_INV>;
+let SubtargetPredicate = isGFX940Plus in {
+def BUFFER_WBL2_gfx940 : MUBUF_Real_gfx940<0x28, BUFFER_WBL2>;
+def BUFFER_INV_gfx940 : MUBUF_Real_gfx940<0x29, BUFFER_INV>;
}
class MTBUF_Real_Base_vi <bits<4> op, MTBUF_Pseudo ps, int Enc> :
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index 6413dd0d6288a..308ab8e3b82c4 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -551,7 +551,7 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
} else if (Bytes.size() >= 16 &&
STI.hasFeature(AMDGPU::FeatureGFX950Insts)) {
DecoderUInt128 DecW = eat16Bytes(Bytes);
- if (tryDecodeInst(DecoderTableGFX942128, MI, DecW, Address, CS))
+ if (tryDecodeInst(DecoderTableGFX940128, MI, DecW, Address, CS))
break;
// Reinitialize Bytes
@@ -580,8 +580,8 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
tryDecodeInst(DecoderTableGFX9_DL64, MI, QW, Address, CS))
break;
- if (STI.hasFeature(AMDGPU::FeatureGFX942Insts) &&
- tryDecodeInst(DecoderTableGFX94264, MI, QW, Address, CS))
+ if (STI.hasFeature(AMDGPU::FeatureGFX940Insts) &&
+ tryDecodeInst(DecoderTableGFX94064, MI, QW, Address, CS))
break;
if (STI.hasFeature(AMDGPU::FeatureGFX90AInsts) &&
diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td
index 7bc668fce8fb7..7988a9ac0ce55 100644
--- a/llvm/lib/Target/AMDGPU/FLATInstructions.td
+++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td
@@ -1866,10 +1866,10 @@ multiclass FLAT_Real_AllAddr_vi<bits<7> op,
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb>;
}
-class FLAT_Real_gfx942 <bits<7> op, FLAT_Pseudo ps> :
+class FLAT_Real_gfx940 <bits<7> op, FLAT_Pseudo ps> :
FLAT_Real <op, ps>,
- SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX942> {
- let AssemblerPredicate = isGFX942Plus;
+ SIMCInstr <ps.PseudoInstr, SIEncodingFamily.GFX940> {
+ let AssemblerPredicate = isGFX940Plus;
let DecoderNamespace = "GFX9";
let Inst{13} = ps.sve;
let Inst{25} = !if(ps.has_sccb, cpol{CPolBit.SCC}, ps.sccbValue);
@@ -1877,43 +1877,43 @@ class FLAT_Real_gfx942 <bits<7> op, FLAT_Pseudo ps> :
multiclass FLAT_Real_AllAddr_SVE_vi<bits<7> op> {
def _vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME)> {
- let AssemblerPredicate = isGFX8GFX9NotGFX942;
- let OtherPredicates = [isGFX8GFX9NotGFX942];
+ let AssemblerPredicate = isGFX8GFX9NotGFX940;
+ let OtherPredicates = [isGFX8GFX9NotGFX940];
}
def _SADDR_vi : FLAT_Real_vi<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")> {
let DecoderNamespace = "GFX9";
}
- let AssemblerPredicate = isGFX942Plus, SubtargetPredicate = isGFX942Plus in {
- def _VE_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME)>;
- def _SVS_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
- def _ST_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
+ let AssemblerPredicate = isGFX940Plus, SubtargetPredicate = isGFX940Plus in {
+ def _VE_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
+ def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
}
}
-multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx942_op,
- string pre_gfx942_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic),
+multiclass FLAT_Real_AllAddr_LDS<bits<7> op, bits<7> pre_gfx940_op,
+ string pre_gfx940_name = !subst("_lds", "", !cast<FLAT_Pseudo>(NAME).Mnemonic),
bit has_sccb = !cast<FLAT_Pseudo>(NAME).has_sccb> {
- let OtherPredicates = [isGFX8GFX9NotGFX942] in {
- def _vi : FLAT_Real_vi<pre_gfx942_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
- let AsmString = pre_gfx942_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
+ let OtherPredicates = [isGFX8GFX9NotGFX940] in {
+ def _vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME), has_sccb> {
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME).AsmOperands # " lds";
}
- def _SADDR_vi : FLAT_Real_vi<pre_gfx942_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
- let AsmString = pre_gfx942_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
+ def _SADDR_vi : FLAT_Real_vi<pre_gfx940_op, !cast<FLAT_Pseudo>(NAME#"_SADDR"), has_sccb> {
+ let AsmString = pre_gfx940_name # !cast<FLAT_Pseudo>(NAME#"_SADDR").AsmOperands # " lds";
}
}
- let SubtargetPredicate = isGFX942Plus in {
- def _gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME)>;
- def _SADDR_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+ let SubtargetPredicate = isGFX940Plus in {
+ def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
}
}
-multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx942_op> {
- defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx942_op>;
- let SubtargetPredicate = isGFX942Plus in {
- def _SVS_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
- def _ST_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
+multiclass FLAT_Real_AllAddr_SVE_LDS<bits<7> op, bits<7> pre_gfx940_op> {
+ defm "" : FLAT_Real_AllAddr_LDS<op, pre_gfx940_op>;
+ let SubtargetPredicate = isGFX940Plus in {
+ def _SVS_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SVS")>;
+ def _ST_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_ST")>;
}
}
@@ -2075,8 +2075,8 @@ defm SCRATCH_STORE_DWORDX2 : FLAT_Real_AllAddr_SVE_vi <0x1d>;
defm SCRATCH_STORE_DWORDX3 : FLAT_Real_AllAddr_SVE_vi <0x1e>;
defm SCRATCH_STORE_DWORDX4 : FLAT_Real_AllAddr_SVE_vi <0x1f>;
-let SubtargetPredicate = isGFX8GFX9NotGFX942 in {
- // These instructions are encoded differently on gfx90* and gfx942.
+let SubtargetPredicate = isGFX8GFX9NotGFX940 in {
+ // These instructions are encoded differently on gfx90* and gfx94*.
defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_vi <0x04d, 0>;
defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_vi <0x04e, 0>;
}
@@ -2090,39 +2090,39 @@ let SubtargetPredicate = isGFX90AOnly in {
defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_vi<0x51, 0>;
} // End SubtargetPredicate = isGFX90AOnly
-multiclass FLAT_Real_AllAddr_gfx942<bits<7> op> {
- def _gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME)>;
- def _SADDR_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
+multiclass FLAT_Real_AllAddr_gfx940<bits<7> op> {
+ def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME)>;
+ def _SADDR_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(NAME#"_SADDR")>;
}
-multiclass FLAT_Real_Atomics_gfx942 <bits<7> op> {
+multiclass FLAT_Real_Atomics_gfx940 <bits<7> op> {
defvar ps = !cast<FLAT_Pseudo>(NAME);
- def _gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
- def _RTN_gfx942 : FLAT_Real_gfx942<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
+ def _gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr)>;
+ def _RTN_gfx940 : FLAT_Real_gfx940<op, !cast<FLAT_Pseudo>(ps.PseudoInstr # "_RTN")>;
}
-multiclass FLAT_Global_Real_Atomics_gfx942<bits<7> op> :
- FLAT_Real_AllAddr_gfx942<op> {
- def _RTN_gfx942 : FLAT_Real_gfx942 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
- def _SADDR_RTN_gfx942 : FLAT_Real_gfx942 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
+multiclass FLAT_Global_Real_Atomics_gfx940<bits<7> op> :
+ FLAT_Real_AllAddr_gfx940<op> {
+ def _RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_RTN")>;
+ def _SADDR_RTN_gfx940 : FLAT_Real_gfx940 <op, !cast<FLAT_Pseudo>(NAME#"_SADDR_RTN")>;
}
-let SubtargetPredicate = isGFX942Plus in {
- // These instructions are encoded differently on gfx90* and gfx942.
- defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx942 <0x04d>;
- defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx942 <0x04e>;
+let SubtargetPredicate = isGFX940Plus in {
+ // These instructions are encoded differently on gfx90* and gfx940.
+ defm GLOBAL_ATOMIC_ADD_F32 : FLAT_Global_Real_Atomics_gfx940 <0x04d>;
+ defm GLOBAL_ATOMIC_PK_ADD_F16 : FLAT_Global_Real_Atomics_gfx940 <0x04e>;
- defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx942<0x4f>;
- defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx942<0x50>;
- defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx942<0x51>;
- defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx942<0x4f>;
- defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx942<0x50>;
- defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx942<0x51>;
+ defm FLAT_ATOMIC_ADD_F64 : FLAT_Real_Atomics_gfx940<0x4f>;
+ defm FLAT_ATOMIC_MIN_F64 : FLAT_Real_Atomics_gfx940<0x50>;
+ defm FLAT_ATOMIC_MAX_F64 : FLAT_Real_Atomics_gfx940<0x51>;
+ defm GLOBAL_ATOMIC_ADD_F64 : FLAT_Global_Real_Atomics_gfx940<0x4f>;
+ defm GLOBAL_ATOMIC_MIN_F64 : FLAT_Global_Real_Atomics_gfx940<0x50>;
+ defm GLOBAL_ATOMIC_MAX_F64 : FLAT_Global_Real_Atomics_gfx940<0x51>;
defm FLAT_ATOMIC_ADD_F32 : FLAT_Real_Atomics_vi<0x4d>;
defm FLAT_ATOMIC_PK_ADD_F16 : FLAT_Real_Atomics_vi<0x4e>;
defm FLAT_ATOMIC_PK_ADD_BF16 : FLAT_Real_Atomics_vi<0x52>;
defm GLOBAL_ATOMIC_PK_ADD_BF16 : FLAT_Global_Real_Atomics_vi<0x52>;
-} // End SubtargetPredicate = isGFX942Plus
+} // End SubtargetPredicate = isGFX940Plus
//===----------------------------------------------------------------------===//
// GFX10.
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 0f76b0ac5331d..1ff75095b220a 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -124,10 +124,10 @@ static bool isXDL(const GCNSubtarget &ST, const MachineInstr &MI) {
Opcode == AMDGPU::V_ACCVGPR_READ_B32_e64)
return false;
- if (!ST.hasGFX942Insts())
+ if (!ST.hasGFX940Insts())
return true;
- return AMDGPU::getMAIIsGFX942XDL(Opcode);
+ return AMDGPU::getMAIIsGFX940XDL(Opcode);
}
static bool isSendMsgTraceDataOrGDS(const SIInstrInfo &TII,
@@ -870,7 +870,7 @@ GCNHazardRecognizer::checkVALUHazardsHelper(const MachineOperand &Def,
// 8 bytes can have there store data over written by the next instruction.
const SIRegisterInfo *TRI = ST.getRegisterInfo();
- const int VALUWaitStates = ST.hasGFX942Insts() ? 2 : 1;
+ const int VALUWaitStates = ST.hasGFX940Insts() ? 2 : 1;
int WaitStatesNeeded = 0;
if (!TRI->isVectorRegister(MRI, Def.getReg()))
@@ -2251,9 +2251,9 @@ int GCNHazardRecognizer::checkMAIHazards908(MachineInstr *MI) {
}
static int
-GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses,
+GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx942 | gfx950
+ // xdl def cycles | gfx940 | gfx950
// 2 pass | 3 4
// 4 pass | 5 6
// 8 pass | 9 10
@@ -2262,9 +2262,9 @@ GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(int NumPasses,
}
static int
-GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses,
+GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses,
bool IsGFX950) {
- // xdl def cycles | gfx942 | gfx950
+ // xdl def cycles | gfx940 | gfx950
// 2 pass | 3 3
// 4 pass | 5 6
// 8 pass | 9 10
@@ -2273,7 +2273,7 @@ GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(int NumPasses,
}
static int
-GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
+GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
// 2 pass -> 2
// 4 pass -> 4
// 8 pass -> 8
@@ -2282,7 +2282,7 @@ GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(int NumPasses) {
}
static int
-GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
+GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
// 2 pass -> 4
// 4 pass -> 6
// 8 pass -> 10
@@ -2290,7 +2290,7 @@ GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses) {
return NumPasses + 2;
}
-static int GFX942_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses,
+static int GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(int NumPasses,
bool IsGFX950) {
// xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
@@ -2343,7 +2343,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
const int DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 11;
const int GFX950_DMFMA16x16WritesVGPROverlappedMFMASrcABWaitStates = 19;
const int DMFMA4x4WritesVGPRFullSrcCWaitStates = 4;
- const int GFX942_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
+ const int GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates = 2;
const int MaxWaitStates = 19;
if (!Use.isReg())
@@ -2375,7 +2375,7 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
unsigned Opc1 = MI1->getOpcode();
int NeedWaitStates = 0;
if (OpNo == SrcCIdx) {
- if (!isDGEMM(Opc) && (!ST.hasGFX942Insts() && isDGEMM(Opc1))) {
+ if (!isDGEMM(Opc) && (!ST.hasGFX940Insts() && isDGEMM(Opc1))) {
NeedWaitStates = 0;
} else if (FullReg) {
if ((Opc == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
@@ -2383,9 +2383,9 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
(Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_e64 ||
Opc1 == AMDGPU::V_MFMA_F64_4X4X4F64_vgprcd_e64))
NeedWaitStates = DMFMA4x4WritesVGPRFullSrcCWaitStates;
- else if (ST.hasGFX942Insts() &&
+ else if (ST.hasGFX940Insts() &&
TSchedModel.computeInstrLatency(MI1) == 2)
- NeedWaitStates = GFX942_SMFMA4x4WritesVGPRFullSrcCWaitStates;
+ NeedWaitStates = GFX940_SMFMA4x4WritesVGPRFullSrcCWaitStates;
} else {
switch (Opc1) {
case AMDGPU::V_MFMA_F64_16X16X4F64_e64:
@@ -2405,18 +2405,18 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
break;
default:
int NumPasses = TSchedModel.computeInstrLatency(MI1);
- if (ST.hasGFX942Insts()) {
+ if (ST.hasGFX940Insts()) {
if (isXDL(ST, *MI) && !isXDL(ST, *MI1))
break;
NeedWaitStates =
isXDL(ST, *MI1)
? (isXDL(ST, *MI)
- ? GFX942_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(
+ ? GFX940_XDL_N_PassWritesVGPROverlappedXDLOrSMFMASrcCWaitStates(
NumPasses, ST.hasGFX950Insts())
- : GFX942_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(
+ : GFX940_XDL_N_PassWritesVGPROverlappedSGEMMDGEMMSrcCWaitStates(
NumPasses, ST.hasGFX950Insts()))
- : GFX942_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
+ : GFX940_SMFMA_N_PassWritesVGPROverlappedSMFMASrcCWaitStates(
NumPasses);
break;
}
@@ -2462,12 +2462,12 @@ int GCNHazardRecognizer::checkMAIHazards90A(MachineInstr *MI) {
default:
int NumPasses = TSchedModel.computeInstrLatency(MI1);
- if (ST.hasGFX942Insts()) {
+ if (ST.hasGFX940Insts()) {
NeedWaitStates =
isXDL(ST, *MI1)
- ? GFX942_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(
+ ? GFX940_XDL_N_PassWritesVGPROverlappedSrcABWaitStates(
NumPasses, ST.hasGFX950Insts())
- : GFX942_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(
+ : GFX940_SMFMA_N_PassWritesVGPROverlappedSrcABWaitStates(
NumPasses);
break;
}
@@ -2590,7 +2590,7 @@ int GCNHazardRecognizer::checkPermlaneHazards(MachineInstr *MI) {
return WaitStatesNeeded;
}
-static int GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
+static int GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
// 2 pass -> 4
// 4 pass -> 6
// 8 pass -> 10
@@ -2598,7 +2598,7 @@ static int GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(int NumPasses) {
return NumPasses + 2;
}
-static int GFX942_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
+static int GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
bool IsGFX950) {
// xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
@@ -2608,7 +2608,7 @@ static int GFX942_XDL_N_PassWriteVgprVALUWawWaitStates(int NumPasses,
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
}
-static int GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
+static int GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
bool IsGFX950) {
// xdl def cycles | gfx942 | gfx950
// 2 pass | 5 5
@@ -2618,7 +2618,7 @@ static int GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses,
return NumPasses + 3 + (NumPasses != 2 && IsGFX950);
}
-static int GFX942_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
+static int GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(int NumPasses) {
// 2 pass -> 4
// 4 pass -> 6
// 8 pass -> 10
@@ -2723,7 +2723,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
// is a DGEMM instruction in-between a VALU and a VMEM instruction it
// causes the SQ to incorrectly not insert two wait states between the two
// instructions needed to avoid data hazard.
- if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX942Insts()) {
+ if (IsMem && ST.hasGFX90AInsts() && !ST.hasGFX940Insts()) {
DGEMMAfterVALUWrite = false;
if (TRI.isVectorRegister(MRI, Reg)) {
int WaitStatesNeededForUse =
@@ -2763,12 +2763,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
default:
llvm_unreachable("unexpected dgemm");
}
- } else if (ST.hasGFX942Insts()) {
+ } else if (ST.hasGFX940Insts()) {
NeedWaitStates =
isXDL(ST, *MFMA)
- ? GFX942_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(
+ ? GFX940_XDL_N_PassWriteVgprVALUMemExpReadWaitStates(
NumPasses, ST.hasGFX950Insts())
- : GFX942_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(
+ : GFX940_SMFMA_N_PassWriteVgprVALUMemExpReadWaitStates(
NumPasses);
} else {
switch (HazardDefLatency) {
@@ -2813,7 +2813,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
const int SMFMA16x16WriteVgprVALUWawWaitStates = 11;
const int SMFMA32x32WriteVgprVALUWawWaitStates = 19;
const int SMFMA4x4ReadVgprVALUWarWaitStates = 1;
- const int GFX942_XDL4PassReadVgprVALUWarWaitStates = 3;
+ const int GFX940_XDL4PassReadVgprVALUWarWaitStates = 3;
const int SMFMA16x16ReadVgprVALUWarWaitStates = 7;
const int SMFMA32x32ReadVgprVALUWarWaitStates = 15;
const int DMFMA4x4WriteVgprVALUWriteWaitStates = 6;
@@ -2850,12 +2850,12 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
default:
llvm_unreachable("unexpected number of cycles for dgemm");
}
- } else if (ST.hasGFX942Insts()) {
+ } else if (ST.hasGFX940Insts()) {
NeedWaitStates =
isXDL(ST, *MFMA)
- ? GFX942_XDL_N_PassWriteVgprVALUWawWaitStates(
+ ? GFX940_XDL_N_PassWriteVgprVALUWawWaitStates(
NumPasses, ST.hasGFX950Insts())
- : GFX942_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses);
+ : GFX940_SMFMA_N_PassWriteVgprVALUWawWaitStates(NumPasses);
} else {
switch (NumPasses) {
case 2:
@@ -2884,7 +2884,7 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
!MI.readsRegister(Reg, &TRI))
return false;
- if (ST.hasGFX942Insts() && !isXDL(ST, MI))
+ if (ST.hasGFX940Insts() && !isXDL(ST, MI))
return false;
const MachineOperand *SrcC =
@@ -2906,21 +2906,16 @@ int GCNHazardRecognizer::checkMAIVALUHazards(MachineInstr *MI) {
unsigned HazardDefLatency = TSchedModel.computeInstrLatency(MFMA);
int NeedWaitStates = MaxWaitStates;
switch (HazardDefLatency) {
- case 2:
- NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
- break;
- case 4:
- assert(ST.hasGFX942Insts());
- NeedWaitStates = GFX942_XDL4PassReadVgprVALUWarWaitStates;
- break;
- case 8:
- NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
- break;
- case 16:
- [[fallthrough]];
- default:
- NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates;
- break;
+ case 2: NeedWaitStates = SMFMA4x4ReadVgprVALUWarWaitStates;
+ break;
+ case 4: assert(ST.hasGFX940Insts());
+ NeedWaitStates = GFX940_XDL4PassReadVgprVALUWarWaitStates;
+ break;
+ case 8: NeedWaitStates = SMFMA16x16ReadVgprVALUWarWaitStates;
+ break;
+ case 16: [[fallthrough]];
+ default: NeedWaitStates = SMFMA32x32ReadVgprVALUWarWaitStates;
+ break;
}
int WaitStatesNeededForUse = NeedWaitStates - WaitStatesSinceUse;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index 72f3d1abb82fe..f7c5c472c93a5 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -107,7 +107,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool GFX8Insts = false;
bool GFX9Insts = false;
bool GFX90AInsts = false;
- bool GFX942Insts = false;
+ bool GFX940Insts = false;
bool GFX950Insts = false;
bool GFX10Insts = false;
bool GFX11Insts = false;
@@ -246,6 +246,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool HasMADIntraFwdBug = false;
bool HasVOPDInsts = false;
bool HasVALUTransUseHazard = false;
+ bool HasForceStoreSC0SC1 = false;
bool HasRequiredExportPriority = false;
bool HasVmemWriteVgprInOrder = false;
bool HasAshrPkInsts = false;
@@ -653,10 +654,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// The ST addressing mode means no registers are used, either VGPR or SGPR,
// but only immediate offset is swizzled and added to the FLAT scratch base.
bool hasFlatScratchSTMode() const {
- return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX942Insts());
+ return hasFlatScratchInsts() && (hasGFX10_3Insts() || hasGFX940Insts());
}
- bool hasFlatScratchSVSMode() const { return GFX942Insts || GFX11Insts; }
+ bool hasFlatScratchSVSMode() const { return GFX940Insts || GFX11Insts; }
bool hasScalarFlatScratchInsts() const {
return ScalarFlatScratchInsts;
@@ -675,7 +676,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
return GFX10_BEncoding;
}
- bool hasExportInsts() const { return !hasGFX942Insts(); }
+ bool hasExportInsts() const {
+ return !hasGFX940Insts();
+ }
bool hasVINTERPEncoding() const {
return GFX11Insts;
@@ -1070,7 +1073,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
}
bool hasFmaakFmamkF32Insts() const {
- return getGeneration() >= GFX10 || hasGFX942Insts();
+ return getGeneration() >= GFX10 || hasGFX940Insts();
}
bool hasImageInsts() const {
@@ -1127,9 +1130,9 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasMadF16() const;
- bool hasMovB64() const { return GFX942Insts; }
+ bool hasMovB64() const { return GFX940Insts; }
- bool hasLshlAddB64() const { return GFX942Insts; }
+ bool hasLshlAddB64() const { return GFX940Insts; }
bool enableSIScheduler() const {
return EnableSIScheduler;
@@ -1213,21 +1216,25 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
// Shift amount of a 64 bit shift cannot be a highest allocated register
// if also at the end of the allocation block.
- bool hasShift64HighRegBug() const { return GFX90AInsts && !GFX942Insts; }
+ bool hasShift64HighRegBug() const {
+ return GFX90AInsts && !GFX940Insts;
+ }
// Has one cycle hazard on transcendental instruction feeding a
// non transcendental VALU.
- bool hasTransForwardingHazard() const { return GFX942Insts; }
+ bool hasTransForwardingHazard() const { return GFX940Insts; }
// Has one cycle hazard on a VALU instruction partially writing dst with
// a shift of result bits feeding another VALU instruction.
- bool hasDstSelForwardingHazard() const { return GFX942Insts; }
+ bool hasDstSelForwardingHazard() const { return GFX940Insts; }
// Cannot use op_sel with v_dot instructions.
- bool hasDOTOpSelHazard() const { return GFX942Insts || GFX11Insts; }
+ bool hasDOTOpSelHazard() const { return GFX940Insts || GFX11Insts; }
// Does not have HW interlocs for VALU writing and then reading SGPRs.
- bool hasVDecCoExecHazard() const { return GFX942Insts; }
+ bool hasVDecCoExecHazard() const {
+ return GFX940Insts;
+ }
bool hasNSAtoVMEMBug() const {
return HasNSAtoVMEMBug;
@@ -1257,6 +1264,8 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasCvtScaleForwardingHazard() const { return GFX950Insts; }
+ bool hasForceStoreSC0SC1() const { return HasForceStoreSC0SC1; }
+
bool requiresCodeObjectV6() const { return RequiresCOV6; }
bool hasVALUMaskWriteHazard() const { return getGeneration() == GFX11; }
@@ -1288,12 +1297,12 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
bool hasPackedTID() const { return HasPackedTID; }
- // GFX942 is a derivation to GFX90A. hasGFX942Insts() being true implies that
+ // GFX94* is a derivation to GFX90A. hasGFX940Insts() being true implies that
// hasGFX90AInsts is also true.
- bool hasGFX942Insts() const { return GFX942Insts; }
+ bool hasGFX940Insts() const { return GFX940Insts; }
- // GFX950 is a derivation to GFX942. hasGFX950Insts() implies that
- // hasGFX942Insts and hasGFX90AInsts are also true.
+ // GFX950 is a derivation to GFX94*. hasGFX950Insts() implies that
+ // hasGFX940Insts and hasGFX90AInsts are also true.
bool hasGFX950Insts() const { return GFX950Insts; }
/// Returns true if the target supports
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index 5a72543c8eef1..381841f142855 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -151,16 +151,15 @@ void AMDGPUInstPrinter::printCPol(const MCInst *MI, unsigned OpNo,
}
if (Imm & CPol::GLC)
- O << ((AMDGPU::isGFX942(STI) &&
- !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD))
- ? " sc0"
- : " glc");
+ O << ((AMDGPU::isGFX940(STI) &&
+ !(MII.get(MI->getOpcode()).TSFlags & SIInstrFlags::SMRD)) ? " sc0"
+ : " glc");
if (Imm & CPol::SLC)
- O << (AMDGPU::isGFX942(STI) ? " nt" : " slc");
+ O << (AMDGPU::isGFX940(STI) ? " nt" : " slc");
if ((Imm & CPol::DLC) && AMDGPU::isGFX10Plus(STI))
O << " dlc";
if ((Imm & CPol::SCC) && AMDGPU::isGFX90A(STI))
- O << (AMDGPU::isGFX942(STI) ? " sc1" : " scc");
+ O << (AMDGPU::isGFX940(STI) ? " sc1" : " scc");
if (Imm & ~CPol::ALL_pregfx12)
O << " /* unexpected cache policy bit */";
}
@@ -630,12 +629,12 @@ void AMDGPUInstPrinter::printBLGP(const MCInst *MI, unsigned OpNo,
if (!Imm)
return;
- if (AMDGPU::isGFX942(STI)) {
+ if (AMDGPU::isGFX940(STI)) {
switch (MI->getOpcode()) {
- case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_acd:
- case AMDGPU::V_MFMA_F64_16X16X4F64_gfx942_vcd:
- case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_acd:
- case AMDGPU::V_MFMA_F64_4X4X4F64_gfx942_vcd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_16X16X4F64_gfx940_vcd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_acd:
+ case AMDGPU::V_MFMA_F64_4X4X4F64_gfx940_vcd:
O << " neg:[" << (Imm & 1) << ',' << ((Imm >> 1) & 1) << ','
<< ((Imm >> 2) & 1) << ']';
return;
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 3d5976183c10f..721601efcc804 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -42,7 +42,7 @@ enum {
GFX10 = 6,
SDWA10 = 7,
GFX90A = 8,
- GFX942 = 9,
+ GFX940 = 9,
GFX11 = 10,
GFX12 = 11,
};
@@ -542,7 +542,7 @@ enum Id { // HwRegCode, (6) [5:0]
ID_EXCP_FLAG_USER = 18,
ID_TRAP_CTRL = 19,
- // GFX942 specific registers
+ // GFX94* specific registers
ID_XCC_ID = 20,
ID_SQ_PERF_SNAPSHOT_DATA = 21,
ID_SQ_PERF_SNAPSHOT_DATA1 = 22,
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 98f22b70c444e..7dace11d208a0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -9475,8 +9475,8 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
if (ST.hasGFX90AInsts()) {
uint16_t NMCOp = (uint16_t)-1;
- if (ST.hasGFX942Insts())
- NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX942);
+ if (ST.hasGFX940Insts())
+ NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX940);
if (NMCOp == (uint16_t)-1)
NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A);
if (NMCOp == (uint16_t)-1)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index ee633f1a26035..e30e257da6873 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -28,7 +28,7 @@ def SIEncodingFamily {
int GFX10 = 6;
int SDWA10 = 7;
int GFX90A = 8;
- int GFX942 = 9;
+ int GFX940 = 9;
int GFX11 = 10;
int GFX12 = 11;
}
@@ -3113,7 +3113,7 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.GFX10)],
[!cast<string>(SIEncodingFamily.SDWA10)],
[!cast<string>(SIEncodingFamily.GFX90A)],
- [!cast<string>(SIEncodingFamily.GFX942)],
+ [!cast<string>(SIEncodingFamily.GFX940)],
[!cast<string>(SIEncodingFamily.GFX11)],
[!cast<string>(SIEncodingFamily.GFX12)]];
}
diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
index 9a41afcd56f44..79fb36acc0ea7 100644
--- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp
@@ -359,6 +359,11 @@ class SICacheControl {
/// Virtual destructor to allow derivations to be deleted.
virtual ~SICacheControl() = default;
+
+ virtual bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
+ MachineBasicBlock::iterator &MI) const {
+ return false;
+ }
};
class SIGfx6CacheControl : public SICacheControl {
@@ -465,7 +470,7 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl {
Position Pos) const override;
};
-class SIGfx942CacheControl : public SIGfx90ACacheControl {
+class SIGfx940CacheControl : public SIGfx90ACacheControl {
protected:
/// Sets SC0 bit to "true" if present in \p MI. Returns true if \p MI
@@ -487,7 +492,7 @@ class SIGfx942CacheControl : public SIGfx90ACacheControl {
}
public:
- SIGfx942CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
+ SIGfx940CacheControl(const GCNSubtarget &ST) : SIGfx90ACacheControl(ST) {};
bool enableLoadCacheBypass(const MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
@@ -512,6 +517,20 @@ class SIGfx942CacheControl : public SIGfx90ACacheControl {
bool insertRelease(MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace, bool IsCrossAddrSpaceOrdering,
Position Pos) const override;
+
+ bool tryForceStoreSC0SC1(const SIMemOpInfo &MOI,
+ MachineBasicBlock::iterator &MI) const override {
+ bool Changed = false;
+ if (ST.hasForceStoreSC0SC1() &&
+ (MOI.getInstrAddrSpace() & (SIAtomicAddrSpace::SCRATCH |
+ SIAtomicAddrSpace::GLOBAL |
+ SIAtomicAddrSpace::OTHER)) !=
+ SIAtomicAddrSpace::NONE) {
+ Changed |= enableSC0Bit(MI);
+ Changed |= enableSC1Bit(MI);
+ }
+ return Changed;
+ }
};
class SIGfx10CacheControl : public SIGfx7CacheControl {
@@ -938,8 +957,8 @@ bool SICacheControl::enableNamedBit(const MachineBasicBlock::iterator MI,
/* static */
std::unique_ptr<SICacheControl> SICacheControl::create(const GCNSubtarget &ST) {
GCNSubtarget::Generation Generation = ST.getGeneration();
- if (ST.hasGFX942Insts())
- return std::make_unique<SIGfx942CacheControl>(ST);
+ if (ST.hasGFX940Insts())
+ return std::make_unique<SIGfx940CacheControl>(ST);
if (ST.hasGFX90AInsts())
return std::make_unique<SIGfx90ACacheControl>(ST);
if (Generation <= AMDGPUSubtarget::SOUTHERN_ISLANDS)
@@ -1557,7 +1576,7 @@ bool SIGfx90ACacheControl::insertRelease(MachineBasicBlock::iterator &MI,
return Changed;
}
-bool SIGfx942CacheControl::enableLoadCacheBypass(
+bool SIGfx940CacheControl::enableLoadCacheBypass(
const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && !MI->mayStore());
@@ -1601,9 +1620,9 @@ bool SIGfx942CacheControl::enableLoadCacheBypass(
return Changed;
}
-bool SIGfx942CacheControl::enableStoreCacheBypass(
- const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
- SIAtomicAddrSpace AddrSpace) const {
+bool SIGfx940CacheControl::enableStoreCacheBypass(
+ const MachineBasicBlock::iterator &MI,
+ SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace) const {
assert(!MI->mayLoad() && MI->mayStore());
bool Changed = false;
@@ -1641,7 +1660,7 @@ bool SIGfx942CacheControl::enableStoreCacheBypass(
return Changed;
}
-bool SIGfx942CacheControl::enableRMWCacheBypass(
+bool SIGfx940CacheControl::enableRMWCacheBypass(
const MachineBasicBlock::iterator &MI, SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace) const {
assert(MI->mayLoad() && MI->mayStore());
@@ -1670,7 +1689,7 @@ bool SIGfx942CacheControl::enableRMWCacheBypass(
return Changed;
}
-bool SIGfx942CacheControl::enableVolatileAndOrNonTemporal(
+bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal(
MachineBasicBlock::iterator &MI, SIAtomicAddrSpace AddrSpace, SIMemOp Op,
bool IsVolatile, bool IsNonTemporal, bool IsLastUse = false) const {
// Only handle load and store, not atomic read-modify-write insructions. The
@@ -1710,7 +1729,7 @@ bool SIGfx942CacheControl::enableVolatileAndOrNonTemporal(
return Changed;
}
-bool SIGfx942CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
+bool SIGfx940CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
Position Pos) const {
@@ -1796,7 +1815,7 @@ bool SIGfx942CacheControl::insertAcquire(MachineBasicBlock::iterator &MI,
return Changed;
}
-bool SIGfx942CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
+bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI,
SIAtomicScope Scope,
SIAtomicAddrSpace AddrSpace,
bool IsCrossAddrSpaceOrdering,
@@ -2801,6 +2820,7 @@ bool SIMemoryLegalizer::runOnMachineFunction(MachineFunction &MF) {
Changed |= expandLoad(*MOI, MI);
else if (const auto &MOI = MOA.getStoreInfo(MI)) {
Changed |= expandStore(*MOI, MI);
+ Changed |= CC->tryForceStoreSC0SC1(*MOI, MI);
} else if (const auto &MOI = MOA.getAtomicFenceInfo(MI))
Changed |= expandAtomicFence(*MOI, MI);
else if (const auto &MOI = MOA.getAtomicCmpxchgOrRmwInfo(MI))
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
index 373c6be33e8a8..e433b85489e6e 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUAsmUtils.cpp
@@ -217,11 +217,11 @@ static constexpr CustomOperand Operands[] = {
{{"HW_REG_SHADER_CYCLES_LO"}, ID_SHADER_CYCLES, isGFX12Plus},
// GFX942 specific registers
- {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX942},
- {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX942},
- {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX942},
- {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX942},
- {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX942},
+ {{"HW_REG_XCC_ID"}, ID_XCC_ID, isGFX940},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_DATA"}, ID_SQ_PERF_SNAPSHOT_DATA, isGFX940},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_DATA1"}, ID_SQ_PERF_SNAPSHOT_DATA1, isGFX940},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_PC_LO"}, ID_SQ_PERF_SNAPSHOT_PC_LO, isGFX940},
+ {{"HW_REG_SQ_PERF_SNAPSHOT_PC_HI"}, ID_SQ_PERF_SNAPSHOT_PC_HI, isGFX940},
// Aliases
{{"HW_REG_HW_ID"}, ID_HW_ID1, isGFX10},
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index a87c1d17bc232..c521d0dd3ad2d 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -543,9 +543,9 @@ bool getMAIIsDGEMM(unsigned Opc) {
return Info ? Info->is_dgemm : false;
}
-bool getMAIIsGFX942XDL(unsigned Opc) {
+bool getMAIIsGFX940XDL(unsigned Opc) {
const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
- return Info ? Info->is_gfx942_xdl : false;
+ return Info ? Info->is_gfx940_xdl : false;
}
uint8_t mfmaScaleF8F6F4FormatToNumRegs(unsigned EncodingVal) {
@@ -2284,8 +2284,8 @@ bool isGFX90A(const MCSubtargetInfo &STI) {
return STI.hasFeature(AMDGPU::FeatureGFX90AInsts);
}
-bool isGFX942(const MCSubtargetInfo &STI) {
- return STI.hasFeature(AMDGPU::FeatureGFX942Insts);
+bool isGFX940(const MCSubtargetInfo &STI) {
+ return STI.hasFeature(AMDGPU::FeatureGFX940Insts);
}
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index c7b9440a7e95a..103993e6435de 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -99,7 +99,7 @@ struct GcnBufferFormatInfo {
struct MAIInstInfo {
uint16_t Opcode;
bool is_dgemm;
- bool is_gfx942_xdl;
+ bool is_gfx940_xdl;
};
struct MFMA_F8F6F4_Info {
@@ -591,7 +591,7 @@ LLVM_READONLY
bool getMAIIsDGEMM(unsigned Opc);
LLVM_READONLY
-bool getMAIIsGFX942XDL(unsigned Opc);
+bool getMAIIsGFX940XDL(unsigned Opc);
struct CanBeVOPD {
bool X;
@@ -1370,7 +1370,7 @@ bool isGFX10_BEncoding(const MCSubtargetInfo &STI);
bool hasGFX10_3Insts(const MCSubtargetInfo &STI);
bool isGFX10_3_GFX11(const MCSubtargetInfo &STI);
bool isGFX90A(const MCSubtargetInfo &STI);
-bool isGFX942(const MCSubtargetInfo &STI);
+bool isGFX940(const MCSubtargetInfo &STI);
bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI);
bool hasMAIInsts(const MCSubtargetInfo &STI);
bool hasVOPD(const MCSubtargetInfo &STI);
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 1a3a7ec52c3b2..a407ae797a48b 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -238,7 +238,7 @@ def VOPProfile_MOV : VOPProfile <[i32, i32, untyped, untyped]> {
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
defm V_MOV_B32 : VOP1Inst <"v_mov_b32", VOPProfile_MOV, null_frag, 0x8>;
-let SubtargetPredicate = isGFX942Plus, SchedRW = [Write64Bit] in
+let SubtargetPredicate = isGFX940Plus, SchedRW = [Write64Bit] in
defm V_MOV_B64 : VOP1Inst <"v_mov_b64", VOP_I64_I64>;
} // End isMoveImm = 1
@@ -1558,7 +1558,7 @@ multiclass VOP1_OpSel_Real_e32e64_gfx9 <bits<10> op> {
defm V_SCREEN_PARTITION_4SE_B32 : VOP1_Real_gfx9 <0x37>;
-let AssemblerPredicate = isGFX942Plus in
+let AssemblerPredicate = isGFX940Plus in
defm V_MOV_B64 : VOP1_Real_gfx9 <0x38>;
defm V_CVT_F32_BF16 : VOP1_Real_gfx9 <0x5b>;
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 95ff1165a4ce3..900c91731aa1b 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -2367,8 +2367,8 @@ multiclass VOP2_Real_MADK_vi <bits<6> op> {
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
}
-multiclass VOP2_Real_MADK_gfx942 <bits<6> op> {
- def _gfx942 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX942>,
+multiclass VOP2_Real_MADK_gfx940 <bits<6> op> {
+ def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>,
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> {
let DecoderNamespace = "GFX9";
}
@@ -2668,8 +2668,8 @@ let IsSingle = 1 in {
}
let SubtargetPredicate = HasFmaakFmamkF32Insts in {
-defm V_FMAMK_F32 : VOP2_Real_MADK_gfx942 <0x17>;
-defm V_FMAAK_F32 : VOP2_Real_MADK_gfx942 <0x18>;
+defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>;
+defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>;
}
multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : Base_VOP2_Real_e32e64_vi<op> {
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 3824383aeace0..afafc2ecccfaf 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -687,7 +687,7 @@ defm V_LSHL_OR_B32 : VOP3Inst <"v_lshl_or_b32", VOP3_Profile<VOP_I32_I32_I32_I32
// V_LSHL_ADD_U64: D0.u64 = (S0.u64 << S1.u[2:0]) + S2.u64
// src0 is shifted left by 0-4 (use “0” to get ADD_U64).
-let SubtargetPredicate = isGFX942Plus in
+let SubtargetPredicate = isGFX940Plus in
defm V_LSHL_ADD_U64 : VOP3Inst <"v_lshl_add_u64", VOP3_Profile<VOP_I64_I64_I32_I64>>;
let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
@@ -705,7 +705,7 @@ let OtherPredicates = [HasFP8ConversionInsts], mayRaiseFPException = 0,
// These instructions have non-standard use of op_sel. In particular they are
// using op_sel bits 2 and 3 while only having two sources. Therefore dummy
// src2 is used to hold the op_sel value.
- let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX942Plus in {
+ let Constraints = "$vdst = $src2", DisableEncoding = "$src2", SubtargetPredicate = isGFX940Plus in {
defm V_CVT_SR_FP8_F32 : VOP3Inst<"v_cvt_sr_fp8_f32", VOP3_CVT_SR_F8_F32_Profile>;
defm V_CVT_SR_BF8_F32 : VOP3Inst<"v_cvt_sr_bf8_f32", VOP3_CVT_SR_F8_F32_Profile>;
}
@@ -734,7 +734,7 @@ foreach Index = [0, -1] in {
def : Cvt_PK_F8_F32_Pat<int_amdgcn_cvt_pk_bf8_f32, Index, V_CVT_PK_BF8_F32_e64>;
}
-let SubtargetPredicate = isGFX942Plus in {
+let SubtargetPredicate = isGFX940Plus in {
foreach Index = [0, 1, 2, 3] in {
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_fp8_f32, Index, V_CVT_SR_FP8_F32_e64>;
def : Cvt_SR_F8_F32_Pat<int_amdgcn_cvt_sr_bf8_f32, Index, V_CVT_SR_BF8_F32_e64>;
@@ -766,7 +766,7 @@ def : GCNPat<
(DivergentBinFrag<mul> i32:$src0, IsPow2Plus1:$src1),
(V_LSHL_ADD_U32_e64 i32:$src0, (i32 (Log2_32 imm:$src1)), i32:$src0)>;
-let SubtargetPredicate = isGFX942Plus in
+let SubtargetPredicate = isGFX940Plus in
def : GCNPat<
(ThreeOpFrag<shl_0_to_4, add> i64:$src0, i32:$src1, i64:$src2),
(V_LSHL_ADD_U64_e64 VSrc_b64:$src0, VSrc_b32:$src1, VSrc_b64:$src2)
diff --git a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
index 730fa17d998b7..d5c6e8af109f4 100644
--- a/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3PInstructions.td
@@ -880,7 +880,7 @@ class MAIInst<string OpName, VOPProfile P, SDPatternOperator node, bit Scaled =
let SubtargetPredicate = HasMAIInsts;
Instruction Opcode = !cast<Instruction>(NAME);
bit is_dgemm = 0;
- bit is_gfx942_xdl = 0;
+ bit is_gfx940_xdl = 0;
let PseudoInstr = NAME; // FIXME: Why is this not the default
}
@@ -1005,7 +1005,7 @@ defm V_MFMA_F32_16X16X4F32 : MAIInst<"v_mfma_f32_16x16x4f32", "F32_F32_X4",
defm V_MFMA_F32_32X32X1F32 : MAIInst<"v_mfma_f32_32x32x1f32", "F32_F32_X32", int_amdgcn_mfma_f32_32x32x1f32>;
defm V_MFMA_F32_32X32X2F32 : MAIInst<"v_mfma_f32_32x32x2f32", "F32_F32_X16", int_amdgcn_mfma_f32_32x32x2f32>;
-let is_gfx942_xdl = 1 in {
+let is_gfx940_xdl = 1 in {
defm V_MFMA_F32_4X4X4F16 : MAIInst<"v_mfma_f32_4x4x4f16", "F32_V4F16_X4", int_amdgcn_mfma_f32_4x4x4f16>;
defm V_MFMA_I32_4X4X4I8 : MAIInst<"v_mfma_i32_4x4x4i8", "I32_I32_X4", int_amdgcn_mfma_i32_4x4x4i8>;
defm V_MFMA_F32_16X16X4F16 : MAIInst<"v_mfma_f32_16x16x4f16", "F32_V4F16_X16", int_amdgcn_mfma_f32_16x16x4f16>;
@@ -1026,7 +1026,7 @@ defm V_MFMA_F32_32X32X2BF16 : MAIInst<"v_mfma_f32_32x32x2bf16", "F32_V2I16_X32",
defm V_MFMA_F32_32X32X4BF16 : MAIInst<"v_mfma_f32_32x32x4bf16", "F32_V2I16_X16", int_amdgcn_mfma_f32_32x32x4bf16>;
}
-let SubtargetPredicate = HasGFX950Insts, is_gfx942_xdl = 1 in {
+let SubtargetPredicate = HasGFX950Insts, is_gfx940_xdl = 1 in {
defm V_MFMA_F32_16X16X32_F16 : MAIInst<"v_mfma_f32_16x16x32f16", "F32_V8F16_X32", int_amdgcn_mfma_f32_16x16x32_f16>;
defm V_MFMA_F32_32X32X16_F16 : MAIInst<"v_mfma_f32_32x32x16f16", "F32_V8F16_X16", int_amdgcn_mfma_f32_32x32x16_f16>;
defm V_MFMA_F32_16X16X32_BF16 : MAIInst<"v_mfma_f32_16x16x32bf16", "F32_V8BF16_X4", int_amdgcn_mfma_f32_16x16x32_bf16>;
@@ -1054,7 +1054,7 @@ defm V_MFMA_LD_SCALE_B32 : VOP3PInst<"v_mfma_ld_scale_b32", VOP_MFMA_LD_SCALE>;
}
let SubtargetPredicate = isGFX90APlus in {
- let is_gfx942_xdl = 1 in {
+ let is_gfx940_xdl = 1 in {
defm V_MFMA_F32_32X32X4BF16_1K : MAIInst<"v_mfma_f32_32x32x4bf16_1k", "F32_V4I16_X32", int_amdgcn_mfma_f32_32x32x4bf16_1k>;
defm V_MFMA_F32_16X16X4BF16_1K : MAIInst<"v_mfma_f32_16x16x4bf16_1k", "F32_V4I16_X16", int_amdgcn_mfma_f32_16x16x4bf16_1k>;
defm V_MFMA_F32_4X4X4BF16_1K : MAIInst<"v_mfma_f32_4x4x4bf16_1k", "F32_V4I16_X4", int_amdgcn_mfma_f32_4x4x4bf16_1k>;
@@ -1068,17 +1068,17 @@ let SubtargetPredicate = isGFX90APlus in {
}
} // End SubtargetPredicate = isGFX90APlus
-let SubtargetPredicate = isGFX942Plus, is_gfx942_xdl = 1 in {
+let SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1 in {
defm V_MFMA_I32_32X32X16I8 : MAIInst<"v_mfma_i32_32x32x16i8", "I32_I64_X32", int_amdgcn_mfma_i32_32x32x16_i8>;
defm V_MFMA_I32_16X16X32I8 : MAIInst<"v_mfma_i32_16x16x32i8", "I32_I64_X16", int_amdgcn_mfma_i32_16x16x32_i8>;
-} // End SubtargetPredicate = isGFX942Plus, is_gfx942_xdl = 1
+} // End SubtargetPredicate = isGFX940Plus, is_gfx940_xdl = 1
-let SubtargetPredicate = HasXF32Insts, is_gfx942_xdl = 1 in {
+let SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1 in {
defm V_MFMA_F32_16X16X8XF32 : MAIInst<"v_mfma_f32_16x16x8xf32", "F32_V2F32_X16", int_amdgcn_mfma_f32_16x16x8_xf32>;
defm V_MFMA_F32_32X32X4XF32 : MAIInst<"v_mfma_f32_32x32x4xf32", "F32_V2F32_X32", int_amdgcn_mfma_f32_32x32x4_xf32>;
-} // End SubtargetPredicate = HasXF32Insts, is_gfx942_xdl = 1
+} // End SubtargetPredicate = HasXF32Insts, is_gfx940_xdl = 1
-let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in {
+let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in {
defm V_MFMA_F32_16X16X32_BF8_BF8 : MAIInst<"v_mfma_f32_16x16x32_bf8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_bf8>;
defm V_MFMA_F32_16X16X32_BF8_FP8 : MAIInst<"v_mfma_f32_16x16x32_bf8_fp8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_bf8_fp8>;
defm V_MFMA_F32_16X16X32_FP8_BF8 : MAIInst<"v_mfma_f32_16x16x32_fp8_bf8", "F32_I64_X32", int_amdgcn_mfma_f32_16x16x32_fp8_bf8>;
@@ -1087,16 +1087,16 @@ let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in {
defm V_MFMA_F32_32X32X16_BF8_FP8 : MAIInst<"v_mfma_f32_32x32x16_bf8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_bf8_fp8>;
defm V_MFMA_F32_32X32X16_FP8_BF8 : MAIInst<"v_mfma_f32_32x32x16_fp8_bf8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_bf8>;
defm V_MFMA_F32_32X32X16_FP8_FP8 : MAIInst<"v_mfma_f32_32x32x16_fp8_fp8", "F32_I64_X16", int_amdgcn_mfma_f32_32x32x16_fp8_fp8>;
-} // End SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1
+} // End SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1
multiclass SMFMACInst<string OpName, string P, SDPatternOperator node> {
let Constraints = "$vdst = $src2", DisableEncoding = "$src2",
- isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx942_xdl = 1 in {
+ isConvergent = 1, mayRaiseFPException = 0, ReadsModeReg = 1, is_gfx940_xdl = 1 in {
def _e64 : MAIInst<OpName, !cast<VOPProfileSMFMAC>("VOPProfileSMFMAC_" # P), node>;
}
}
-let SubtargetPredicate = isGFX942Plus in {
+let SubtargetPredicate = isGFX940Plus in {
defm V_SMFMAC_F32_16X16X32_F16 : SMFMACInst<"v_smfmac_f32_16x16x32_f16", "F32_16X16X32_F16", int_amdgcn_smfmac_f32_16x16x32_f16>;
defm V_SMFMAC_F32_32X32X16_F16 : SMFMACInst<"v_smfmac_f32_32x32x16_f16", "F32_32X32X16_F16", int_amdgcn_smfmac_f32_32x32x16_f16>;
defm V_SMFMAC_F32_16X16X32_BF16 : SMFMACInst<"v_smfmac_f32_16x16x32_bf16", "F32_16X16X32_I16", int_amdgcn_smfmac_f32_16x16x32_bf16>;
@@ -1105,7 +1105,7 @@ defm V_SMFMAC_I32_16X16X64_I8 : SMFMACInst<"v_smfmac_i32_16x16x64_i8",
defm V_SMFMAC_I32_32X32X32_I8 : SMFMACInst<"v_smfmac_i32_32x32x32_i8", "I32_32X32X32_I8", int_amdgcn_smfmac_i32_32x32x32_i8>;
}
-let SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1 in {
+let SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1 in {
defm V_SMFMAC_F32_16X16X64_BF8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_bf8>;
defm V_SMFMAC_F32_16X16X64_BF8_FP8 : SMFMACInst<"v_smfmac_f32_16x16x64_bf8_fp8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_bf8_fp8>;
defm V_SMFMAC_F32_16X16X64_FP8_BF8 : SMFMACInst<"v_smfmac_f32_16x16x64_fp8_bf8", "F32_16X16X64_F8", int_amdgcn_smfmac_f32_16x16x64_fp8_bf8>;
@@ -1114,7 +1114,7 @@ defm V_SMFMAC_F32_32X32X32_BF8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_bf8",
defm V_SMFMAC_F32_32X32X32_BF8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_bf8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_bf8_fp8>;
defm V_SMFMAC_F32_32X32X32_FP8_BF8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_bf8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_bf8>;
defm V_SMFMAC_F32_32X32X32_FP8_FP8 : SMFMACInst<"v_smfmac_f32_32x32x32_fp8_fp8", "F32_32X32X32_F8", int_amdgcn_smfmac_f32_32x32x32_fp8_fp8>;
-} // End SubtargetPredicate = HasFP8Insts, is_gfx942_xdl = 1
+} // End SubtargetPredicate = HasFP8Insts, is_gfx940_xdl = 1
let SubtargetPredicate = HasGFX950Insts in {
defm V_SMFMAC_F32_16X16X64_F16 : SMFMACInst<"v_smfmac_f32_16x16x64_f16", "F32_16X16X64_F16", int_amdgcn_smfmac_f32_16x16x64_f16>;
@@ -1137,7 +1137,7 @@ def MAIInstInfoTable : GenericTable {
let FilterClass = "MAIInst";
let CppTypeName = "MAIInstInfo";
let Fields = [
- "Opcode", "is_dgemm", "is_gfx942_xdl"
+ "Opcode", "is_dgemm", "is_gfx940_xdl"
];
let PrimaryKey = ["Opcode"];
@@ -1947,7 +1947,7 @@ multiclass VOP3P_Real_MFMA_gfx90a<bits<7> op> {
}
}
-multiclass VOP3P_Real_MFMA_gfx942_aliases<string NameFrom, string NameTo, string Op,
+multiclass VOP3P_Real_MFMA_gfx940_aliases<string NameFrom, string NameTo, string Op,
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(Op # "_e64"),
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(Op # "_vgprcd" # "_e64"),
VOPProfile Pfl_ACD = PS_ACD.Pfl,
@@ -1956,59 +1956,59 @@ multiclass VOP3P_Real_MFMA_gfx942_aliases<string NameFrom, string NameTo, string
let SubtargetPredicate = PS_ACD.SubtargetPredicate,
OtherPredicates = PS_ACD.OtherPredicates in {
def : InstAlias <NameTo # " " # PS_ACD.AsmOperands,
- (!cast<VOP3P_Real>(Op # "_gfx942_acd") Pfl_ACD.DstRC:$vdst,
+ (!cast<VOP3P_Real>(Op # "_gfx940_acd") Pfl_ACD.DstRC:$vdst,
Pfl_ACD.Src0RC64:$src0, Pfl_ACD.Src1RC64:$src1, Pfl_ACD.Src2RC64:$src2,
CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl;
def : InstAlias <NameTo # " " # PS_VCD.AsmOperands,
- (!cast<VOP3P_Real>(Op # "_gfx942_vcd") Pfl_VCD.DstRC:$vdst,
+ (!cast<VOP3P_Real>(Op # "_gfx940_vcd") Pfl_VCD.DstRC:$vdst,
Pfl_VCD.Src0RC64:$src0, Pfl_VCD.Src1RC64:$src1, Pfl_VCD.Src2RC64:$src2,
CBSZ:$cbsz, ABID:$abid, blgp:$blgp)>, PredicateControl;
}
}
}
-multiclass VOP3P_Real_MFMA_gfx942<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
+multiclass VOP3P_Real_MFMA_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
- let AssemblerPredicate = isGFX942Plus,
- DecoderNamespace = "GFX942",
+ let AssemblerPredicate = isGFX940Plus,
+ DecoderNamespace = "GFX940",
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
- def _gfx942_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX942>,
+ def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
VOP3Pe_MAI <op, PS_ACD.Pfl, 1>;
- def _gfx942_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX942>,
+ def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
VOP3Pe_MAI <op, PS_VCD.Pfl, 0>;
- } // End AssemblerPredicate = isGFX942Plus, DecoderNamespace = "GFX942"
+ } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940"
let SubtargetPredicate = PS_ACD.SubtargetPredicate,
OtherPredicates = PS_ACD.OtherPredicates,
- AssemblerPredicate = isGFX942Plus
+ AssemblerPredicate = isGFX940Plus
in {
- defm : VOP3P_Real_MFMA_gfx942_aliases<Name, PS_ACD.Mnemonic, NAME>;
+ defm : VOP3P_Real_MFMA_gfx940_aliases<Name, PS_ACD.Mnemonic, NAME>;
if !ne(!subst("_1k", "", PS_ACD.Mnemonic), PS_ACD.Mnemonic) then
- defm : VOP3P_Real_MFMA_gfx942_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>;
+ defm : VOP3P_Real_MFMA_gfx940_aliases<Name, !subst("_1k", "", PS_ACD.Mnemonic), NAME>;
}
}
-multiclass VOP3P_Real_MFMA_F8F6F4_gfx942<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
+multiclass VOP3P_Real_MFMA_F8F6F4_gfx940<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
VOP3_Pseudo PS_ACD = !cast<VOP3_Pseudo>(NAME # "_e64"),
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8";
- let AssemblerPredicate = isGFX942Plus,
- DecoderNamespace = "GFX942",
+ let AssemblerPredicate = isGFX940Plus,
+ DecoderNamespace = "GFX940",
AsmString = Name # PS_ACD.AsmOperands,
Constraints = "" in {
- def _gfx942_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX942>,
+ def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
VOP3Pe_MAI <op, PS_ACD.Pfl, 1>,
- MFMA_F8F6F4_WithSizeTable_Helper<PS_ACD, F8F8Name#"_gfx942_acd">;
+ MFMA_F8F6F4_WithSizeTable_Helper<PS_ACD, F8F8Name#"_gfx940_acd">;
- def _gfx942_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX942>,
+ def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
VOP3Pe_MAI <op, PS_VCD.Pfl, 0>,
- MFMA_F8F6F4_WithSizeTable_Helper<PS_VCD, F8F8Name#"_gfx942_vcd">;
- } // End AssemblerPredicate = isGFX942Plus, DecoderNamespace = "GFX942"
+ MFMA_F8F6F4_WithSizeTable_Helper<PS_VCD, F8F8Name#"_gfx940_vcd">;
+ } // End AssemblerPredicate = isGFX940Plus, DecoderNamespace = "GFX940"
}
multiclass VOP3P_Real_MFMA_gfx950<bits<7> op, string Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic,
@@ -2016,23 +2016,23 @@ multiclass VOP3P_Real_MFMA_gfx950<bits<7> op, string Name = !cast<VOP3_Pseudo>(N
VOP3_Pseudo PS_VCD = !cast<VOP3_Pseudo>(NAME # "_vgprcd" # "_e64")> {
let SubtargetPredicate = HasGFX950Insts,
AssemblerPredicate = HasGFX950Insts in {
- defm "" : VOP3P_Real_MFMA_gfx942<op, Name, PS_ACD, PS_VCD>;
+ defm "" : VOP3P_Real_MFMA_gfx940<op, Name, PS_ACD, PS_VCD>;
}
}
multiclass VOP3P_Real_MFMA_F8F6F4_gfx950_mc<bits<7> op, string Name> {
- defm _f8_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
+ defm _f8_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
let isAsmParserOnly = true in { // Disable ambiguous disassembly.
- defm _f8_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
- defm _f6_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
- defm _f8_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
- defm _f4_f8 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
- defm _f6_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
- defm _f6_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
- defm _f4_f6 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
- defm _f4_f4 : VOP3P_Real_MFMA_F8F6F4_gfx942<op, Name>;
+ defm _f8_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
+ defm _f6_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
+ defm _f8_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
+ defm _f4_f8 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
+ defm _f6_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
+ defm _f6_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
+ defm _f4_f6 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
+ defm _f4_f4 : VOP3P_Real_MFMA_F8F6F4_gfx940<op, Name>;
}
}
@@ -2043,15 +2043,15 @@ multiclass VOP3PX_Real_ScaledMFMA<bits<7> op> {
defvar F8F8Name = !substr(NAME, 0, !sub(!size(NAME), !size("_fN_fM")))#"_f8_f8";
let SubtargetPredicate = HasGFX950Insts,
- DecoderNamespace = "GFX942",
+ DecoderNamespace = "GFX940",
AsmString = Name # PS_ACD.AsmOperands, Constraints = "" in {
- def _gfx942_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX942>,
+ def _gfx940_acd : VOP3P_Real<PS_ACD, SIEncodingFamily.GFX940>,
VOP3PXe <op, PS_ACD.Pfl, /*acc_cd=*/1>,
- MFMA_F8F6F4_WithSizeTable_Helper<PS_ACD, F8F8Name#"_gfx942_acd">;
+ MFMA_F8F6F4_WithSizeTable_Helper<PS_ACD, F8F8Name#"_gfx940_acd">;
- def _gfx942_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX942>,
+ def _gfx940_vcd : VOP3P_Real<PS_VCD, SIEncodingFamily.GFX940>,
VOP3PXe <op, PS_VCD.Pfl, /*acc_cd=*/0>,
- MFMA_F8F6F4_WithSizeTable_Helper<PS_VCD, F8F8Name#"_gfx942_vcd">;
+ MFMA_F8F6F4_WithSizeTable_Helper<PS_VCD, F8F8Name#"_gfx940_vcd">;
}
}
@@ -2084,18 +2084,18 @@ multiclass VOP3P_Real_MFMA_vi_gfx90a<bits<7> op> :
VOP3P_Real_MFMA_gfx90a <op>,
VOP3P_Real_MFMA_vi <op>;
-multiclass VOP3P_Real_MFMA<bits<7> op, string GFX942Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
+multiclass VOP3P_Real_MFMA<bits<7> op, string GFX940Name = !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> :
VOP3P_Real_MFMA_vi_gfx90a <op>,
- VOP3P_Real_MFMA_gfx942 <op, GFX942Name>;
+ VOP3P_Real_MFMA_gfx940 <op, GFX940Name>;
multiclass VOP3P_Real_SMFMAC<bits<7> op, string alias> {
- def _gfx942 : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
+ def _gfx940 : VOP3P_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.VI>,
VOP3Pe_SMFMAC <op> {
- let AssemblerPredicate = isGFX942Plus;
+ let AssemblerPredicate = isGFX940Plus;
let DecoderNamespace = "GFX8";
}
def : AMDGPUMnemonicAlias<alias, !cast<VOP3_Pseudo>(NAME#"_e64").Mnemonic> {
- let AssemblerPredicate = isGFX942Plus;
+ let AssemblerPredicate = isGFX940Plus;
}
}
@@ -2193,28 +2193,28 @@ defm V_MFMA_F32_32X32X64_F8F6F4 : VOP3P_Real_MFMA_F8F6F4_gfx950_mc <0x2e, "v_mf
defm V_MFMA_SCALE_F32_32X32X64_F8F6F4 : VOP3PX_Real_ScaledMFMA_F8F6F4_mc <0x2e>;
defm V_DOT2_F32_BF16 : VOP3P_Real_vi<0x1a>;
-defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx942 <0x56, "v_mfma_i32_32x32x16_i8">;
-defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx942 <0x57, "v_mfma_i32_16x16x32_i8">;
-defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx942 <0x3e, "v_mfma_f32_16x16x8_xf32">;
-defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx942 <0x3f, "v_mfma_f32_32x32x4_xf32">;
-
-defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx942 <0x70>;
-defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx942 <0x71>;
-defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx942 <0x72>;
-defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx942 <0x73>;
-defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx942 <0x74>;
-defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx942 <0x75>;
-defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx942 <0x76>;
-defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx942 <0x77>;
-
-defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">;
-defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">;
-defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx942 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">;
-defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx942 <0x60, "v_mfma_f32_32x32x8_bf16">;
-defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx942 <0x61, "v_mfma_f32_16x16x16_bf16">;
-
-defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx942 <0x6e, "v_mfma_f64_16x16x4_f64">;
-defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx942 <0x6f, "v_mfma_f64_4x4x4_4b_f64">;
+defm V_MFMA_I32_32X32X16I8 : VOP3P_Real_MFMA_gfx940 <0x56, "v_mfma_i32_32x32x16_i8">;
+defm V_MFMA_I32_16X16X32I8 : VOP3P_Real_MFMA_gfx940 <0x57, "v_mfma_i32_16x16x32_i8">;
+defm V_MFMA_F32_16X16X8XF32 : VOP3P_Real_MFMA_gfx940 <0x3e, "v_mfma_f32_16x16x8_xf32">;
+defm V_MFMA_F32_32X32X4XF32 : VOP3P_Real_MFMA_gfx940 <0x3f, "v_mfma_f32_32x32x4_xf32">;
+
+defm V_MFMA_F32_16X16X32_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x70>;
+defm V_MFMA_F32_16X16X32_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x71>;
+defm V_MFMA_F32_16X16X32_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x72>;
+defm V_MFMA_F32_16X16X32_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x73>;
+defm V_MFMA_F32_32X32X16_BF8_BF8 : VOP3P_Real_MFMA_gfx940 <0x74>;
+defm V_MFMA_F32_32X32X16_BF8_FP8 : VOP3P_Real_MFMA_gfx940 <0x75>;
+defm V_MFMA_F32_32X32X16_FP8_BF8 : VOP3P_Real_MFMA_gfx940 <0x76>;
+defm V_MFMA_F32_32X32X16_FP8_FP8 : VOP3P_Real_MFMA_gfx940 <0x77>;
+
+defm V_MFMA_F32_32X32X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5d, "v_mfma_f32_32x32x4_2b_bf16">;
+defm V_MFMA_F32_16X16X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5e, "v_mfma_f32_16x16x4_4b_bf16">;
+defm V_MFMA_F32_4X4X4BF16_1K : VOP3P_Real_MFMA_gfx940 <0x5f, "v_mfma_f32_4x4x4_16b_bf16">;
+defm V_MFMA_F32_32X32X8BF16_1K : VOP3P_Real_MFMA_gfx940 <0x60, "v_mfma_f32_32x32x8_bf16">;
+defm V_MFMA_F32_16X16X16BF16_1K : VOP3P_Real_MFMA_gfx940 <0x61, "v_mfma_f32_16x16x16_bf16">;
+
+defm V_MFMA_F64_16X16X4F64 : VOP3P_Real_MFMA_gfx940 <0x6e, "v_mfma_f64_16x16x4_f64">;
+defm V_MFMA_F64_4X4X4F64 : VOP3P_Real_MFMA_gfx940 <0x6f, "v_mfma_f64_4x4x4_4b_f64">;
defm V_SMFMAC_F32_16X16X32_F16 : VOP3P_Real_SMFMAC <0x62, "v_smfmac_f32_16x16x32f16">;
defm V_SMFMAC_F32_32X32X16_F16 : VOP3P_Real_SMFMAC <0x64, "v_smfmac_f32_32x32x16f16">;
diff --git a/llvm/lib/TargetParser/TargetParser.cpp b/llvm/lib/TargetParser/TargetParser.cpp
index 13cb0d6461eb8..8731a16b88a5c 100644
--- a/llvm/lib/TargetParser/TargetParser.cpp
+++ b/llvm/lib/TargetParser/TargetParser.cpp
@@ -358,7 +358,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["gfx8-insts"] = true;
Features["gfx9-insts"] = true;
Features["gfx90a-insts"] = true;
- Features["gfx942-insts"] = true;
+ Features["gfx940-insts"] = true;
Features["gfx950-insts"] = true;
Features["gfx10-insts"] = true;
Features["gfx10-3-insts"] = true;
@@ -508,7 +508,7 @@ void AMDGPU::fillAMDGPUFeatureMap(StringRef GPU, const Triple &T,
Features["xf32-insts"] = true;
[[fallthrough]];
case GK_GFX9_4_GENERIC:
- Features["gfx942-insts"] = true;
+ Features["gfx940-insts"] = true;
Features["atomic-ds-pk-add-16-insts"] = true;
Features["atomic-flat-pk-add-16-insts"] = true;
Features["atomic-global-pk-add-bf16-inst"] = true;
>From c69444cfae45a5367eb5c5cb600ceff4f4bbc649 Mon Sep 17 00:00:00 2001
From: Fabian Ritter <fabian.ritter at amd.com>
Date: Tue, 18 Feb 2025 03:18:33 -0500
Subject: [PATCH 3/3] Update
llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
---
.../test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
index b008f397318e8..89c9801b5e466 100644
--- a/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
+++ b/llvm/test/CodeGen/AMDGPU/preload-implicit-kernargs-debug-info.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -amdgpu-kernarg-preload-count=16 -S < %s 2>&1 \
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mcpu=gfx942 -passes='amdgpu-attributor,function(amdgpu-lower-kernel-arguments)' -amdgpu-kernarg-preload-count=16 -S < %s 2>&1 \
; RUN: | FileCheck --match-full-lines --implicit-check-not='declare' %s
; Confirms we do not leave behind a declaration which references the same
More information about the cfe-commits
mailing list