[llvm] 2c830c8 - [AMDGPU] gfx940: support V_FMAMK_F32 and V_FMAAK_F32
Stanislav Mekhanoshin via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 7 11:31:07 PST 2022
Author: Stanislav Mekhanoshin
Date: 2022-03-07T11:31:01-08:00
New Revision: 2c830c8fab8f27c739f6f98ab537a2670ac0d1aa
URL: https://github.com/llvm/llvm-project/commit/2c830c8fab8f27c739f6f98ab537a2670ac0d1aa
DIFF: https://github.com/llvm/llvm-project/commit/2c830c8fab8f27c739f6f98ab537a2670ac0d1aa.diff
LOG: [AMDGPU] gfx940: support V_FMAMK_F32 and V_FMAAK_F32
Differential Revision: https://reviews.llvm.org/D120769
Added:
llvm/test/MC/AMDGPU/gfx940_asm_features.s
llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
Modified:
llvm/lib/Target/AMDGPU/AMDGPU.td
llvm/lib/Target/AMDGPU/GCNSubtarget.h
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
llvm/lib/Target/AMDGPU/SIInstrInfo.td
llvm/lib/Target/AMDGPU/VOP2Instructions.td
llvm/test/CodeGen/AMDGPU/madak.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 5dc6874779279..dcaf02524bbc9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -1459,7 +1459,7 @@ def HasPackedFP32Ops : Predicate<"Subtarget->hasPackedFP32Ops()">,
def HasFmaakFmamkF32Insts :
Predicate<"Subtarget->hasFmaakFmamkF32Insts()">,
- AssemblerPredicate<(any_of FeatureGFX10Insts)>;
+ AssemblerPredicate<(any_of FeatureGFX10Insts, FeatureGFX940Insts)>;
def HasImageInsts : Predicate<"Subtarget->hasImageInsts()">,
AssemblerPredicate<(all_of FeatureImageInsts)>;
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index fc9e42418f448..bf90007afddd3 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -843,7 +843,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
}
bool hasFmaakFmamkF32Insts() const {
- return getGeneration() >= GFX10;
+ return getGeneration() >= GFX10 || hasGFX940Insts();
}
bool hasImageInsts() const {
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 547256f311339..9076f9e60296b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -7769,7 +7769,8 @@ enum SIEncodingFamily {
GFX9 = 5,
GFX10 = 6,
SDWA10 = 7,
- GFX90A = 8
+ GFX90A = 8,
+ GFX940 = 9
};
static SIEncodingFamily subtargetEncodingFamily(const GCNSubtarget &ST) {
@@ -7849,6 +7850,9 @@ int SIInstrInfo::pseudoToMCOpcode(int Opcode) const {
if (ST.hasGFX90AInsts()) {
uint16_t NMCOp = (uint16_t)-1;
+ if (ST.hasGFX940Insts())
+ NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX940);
+ if (NMCOp == (uint16_t)-1)
NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX90A);
if (NMCOp == (uint16_t)-1)
NMCOp = AMDGPU::getMCOpcode(Opcode, SIEncodingFamily::GFX9);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 67e055d59707d..cd2176802ca4e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -29,6 +29,7 @@ def SIEncodingFamily {
int GFX10 = 6;
int SDWA10 = 7;
int GFX90A = 8;
+ int GFX940 = 9;
}
//===----------------------------------------------------------------------===//
@@ -2543,7 +2544,8 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.GFX9)],
[!cast<string>(SIEncodingFamily.GFX10)],
[!cast<string>(SIEncodingFamily.SDWA10)],
- [!cast<string>(SIEncodingFamily.GFX90A)]];
+ [!cast<string>(SIEncodingFamily.GFX90A)],
+ [!cast<string>(SIEncodingFamily.GFX940)]];
}
// Get equivalent SOPK instruction.
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index b9ff814a4dc5c..cdeb4e48f584e 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -1436,6 +1436,13 @@ multiclass VOP2_Real_MADK_vi <bits<6> op> {
VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
}
+multiclass VOP2_Real_MADK_gfx940 <bits<6> op> {
+ def _gfx940 : VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.GFX940>,
+ VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl> {
+ let DecoderNamespace = "GFX9";
+ }
+}
+
multiclass VOP2_Real_e32_vi <bits<6> op> {
def _e32_vi :
VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.VI>,
@@ -1736,6 +1743,11 @@ let SubtargetPredicate = isGFX90APlus in {
}
} // End SubtargetPredicate = isGFX90APlus
+let SubtargetPredicate = HasFmaakFmamkF32Insts in {
+defm V_FMAMK_F32 : VOP2_Real_MADK_gfx940 <0x17>;
+defm V_FMAAK_F32 : VOP2_Real_MADK_gfx940 <0x18>;
+}
+
multiclass VOP2_Real_DOT_ACC_gfx9<bits<6> op> : VOP2_Real_e32_vi<op> {
def _dpp_vi : VOP2_DPP<op, !cast<VOP2_DPP_Pseudo>(NAME#"_dpp")>;
}
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index bb33a33cdb8c7..28b8c846982fa 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -2,7 +2,8 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX8,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,GFX6_8_9,GFX8_9,GFX8_9_10,MAD %s
; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,GFX10-MAD %s
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_9_10,FMA,GFX10-FMA %s
+; RUN: llc -march=amdgcn -mcpu=gfx940 -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefixes=GCN,GFX9,GFX8_9_10,FMA,GFX940-FMA %s
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
declare float @llvm.fabs.f32(float) nounwind readnone
@@ -135,7 +136,8 @@ define amdgpu_kernel void @madak_inline_imm_f32(float addrspace(1)* noalias %out
; GCN-NOT: v_madak_f32
; GFX6_8_9: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
; GFX10-MAD: v_mad_f32 v{{[0-9]+}}, [[VA]], [[SB]], 0x41200000
-; FMA: v_fma_f32 v{{[0-9]+}}, [[VA]], [[SB]], 0x41200000
+; GFX10-FMA: v_fma_f32 v{{[0-9]+}}, [[VA]], [[SB]], 0x41200000
+; GFX940-FMA: v_fmac_f32_e32 v{{[0-9]+}}, [[SB]], [[VA]]
define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float %b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.a.gep = getelementptr float, float addrspace(1)* %in.a, i32 %tid
@@ -156,7 +158,8 @@ define amdgpu_kernel void @s_v_madak_f32(float addrspace(1)* noalias %out, float
; GFX6_8_9-NOT: v_madak_f32
; GFX6_8_9: v_mac_f32_e32 [[VK]], [[SB]], [[VA]]
; GFX10-MAD: v_madak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
-; FMA: v_fmaak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
+; GFX10-FMA: v_fmaak_f32 v{{[0-9]+}}, [[SB]], [[VA]], 0x41200000
+; GFX940-FMA: v_fmac_f32_e32 v{{[0-9]+}}, [[SB]], [[VA]]
define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float %a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%in.b.gep = getelementptr float, float addrspace(1)* %in.b, i32 %tid
@@ -174,7 +177,8 @@ define amdgpu_kernel void @v_s_madak_f32(float addrspace(1)* noalias %out, float
; GCN-NOT: v_madak_f32
; GFX8_9: v_mac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; GFX10-MAD: v_mac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-; FMA: v_fmac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; GFX10-FMA: v_fmac_f32_e64 {{v[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
+; GFX940-FMA: v_fmac_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, float %b) #0 {
%mul = fmul float %a, %b
%madak = fadd float %mul, 10.0
@@ -189,7 +193,8 @@ define amdgpu_kernel void @s_s_madak_f32(float addrspace(1)* %out, float %a, flo
; GFX8_9_10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
; GFX6_8_9: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{[sv][0-9]+}}
; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
-; FMA: v_fma_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
+; GFX10-FMA: v_fma_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, 0x41200000
+; GFX940-FMA: v_fma_f32 {{v[0-9]+}}, |{{v[0-9]+}}|, {{v[0-9]+}}, {{s[0-9]+}}
; GCN: s_endpgm
define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -215,7 +220,8 @@ define amdgpu_kernel void @no_madak_src0_modifier_f32(float addrspace(1)* noalia
; GFX8_9_10: {{flat|global}}_load_dword [[VA:v[0-9]+]]
; GFX6_8_9: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{[sv][0-9]+}}
; GFX10-MAD: v_mad_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
-; FMA: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
+; GFX10-FMA: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, 0x41200000
+; GFX940-FMA: v_fma_f32 {{v[0-9]+}}, {{v[0-9]+}}, |{{v[0-9]+}}|, {{s[0-9]+}}
; GCN: s_endpgm
define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalias %out, float addrspace(1)* noalias %in.a, float addrspace(1)* noalias %in.b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -245,7 +251,8 @@ define amdgpu_kernel void @no_madak_src1_modifier_f32(float addrspace(1)* noalia
; MAD: v_mac_f32_e64 [[MADAK]], [[SGPR0]], 0.5
; GFX10: v_mov_b32_e32 [[SGPR0_VCOPY:v[0-9]+]], [[SGPR0]]
; GFX10-MAD: v_madak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
-; FMA: v_fmaak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
+; GFX10-FMA: v_fmaak_f32 [[MADAK:v[0-9]+]], 0.5, [[SGPR0_VCOPY]], 0x42280000
+; GFX940-FMA: v_fmac_f32_e64 [[MADAK:v[0-9]+]], [[SGPR0]], 0.5
; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[MADAK]], [[VGPR]]
; GFX6: buffer_store_dword [[MUL]]
; GFX8_9_10: {{flat|global}}_store_dword v[{{[0-9:]+}}], [[MUL]]
diff --git a/llvm/test/MC/AMDGPU/gfx940_asm_features.s b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
new file mode 100644
index 0000000000000..7b905aa48b64d
--- /dev/null
+++ b/llvm/test/MC/AMDGPU/gfx940_asm_features.s
@@ -0,0 +1,10 @@
+// RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -show-encoding %s | FileCheck --check-prefix=GFX940 --strict-whitespace %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx90a %s 2>&1 | FileCheck --check-prefixes=GFX90A --implicit-check-not=error: %s
+
+// GFX90A: error: instruction not supported on this GPU
+// GFX940: v_fmamk_f32 v0, v2, 0x42c80000, v3 ; encoding: [0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42]
+v_fmamk_f32 v0, v2, 100.0, v3
+
+// GFX90A: error: instruction not supported on this GPU
+// GFX940: v_fmaak_f32 v0, v2, v3, 0x42c80000 ; encoding: [0x02,0x07,0x00,0x30,0x00,0x00,0xc8,0x42]
+v_fmaak_f32 v0, v2, v3, 100.0
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
new file mode 100644
index 0000000000000..edfff233133be
--- /dev/null
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx940_dasm_features.txt
@@ -0,0 +1,7 @@
+# RUN: llvm-mc -arch=amdgcn -mcpu=gfx940 -disassemble -show-encoding %s | FileCheck -strict-whitespace --check-prefix=GFX940 %s
+
+# GFX940: v_fmamk_f32 v0, v2, 0x42c80000, v3 ; encoding: [0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42]
+0x02,0x07,0x00,0x2e,0x00,0x00,0xc8,0x42
+
+# GFX940: v_fmaak_f32 v0, v2, v3, 0x42c80000 ; encoding: [0x02,0x07,0x00,0x30,0x00,0x00,0xc8,0x42]
+0x02,0x07,0x00,0x30,0x00,0x00,0xc8,0x42
More information about the llvm-commits
mailing list